From 022441273e7ae85b0e0dbf009c3757991977236d Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Wed, 6 May 2026 16:47:15 +0100 Subject: [PATCH 1/4] Implement aarch64 AES LLVM intrinsics --- example/neon.rs | 53 +++++++++++++++++++++++++++++ src/intrinsics/llvm_aarch64.rs | 61 ++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/example/neon.rs b/example/neon.rs index b0bcf3573c..baf560c591 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -259,6 +259,54 @@ unsafe fn test_vrndnq_f32() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "aes")] +unsafe fn test_vaeseq_u8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.aese + let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let e = u8x16::from([ + 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, + 0xca, + ]); + let r: u8x16 = unsafe { transmute(vaeseq_u8(transmute(a), transmute(b))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "aes")] +unsafe fn test_vaesdq_u8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.aesd + let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let e = u8x16::from([ + 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, + 0x7c, + ]); + let r: u8x16 = unsafe { transmute(vaesdq_u8(transmute(a), transmute(b))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "aes")] +unsafe fn test_vaesmcq_u8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.aesmc + let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let e = u8x16::from([2, 7, 0, 5, 6, 3, 4, 1, 10, 15, 8, 13, 14, 11, 12, 9]); + let r: u8x16 = unsafe { transmute(vaesmcq_u8(transmute(a))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "aes")] +unsafe fn test_vaesimcq_u8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.aesimc + let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let e = u8x16::from([10, 15, 8, 13, 14, 11, 12, 9, 2, 7, 0, 5, 6, 3, 4, 1]); + let r: u8x16 = unsafe { transmute(vaesimcq_u8(transmute(a))) }; + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] fn main() { unsafe { @@ -293,6 +341,11 @@ fn main() { test_vrndnq_f32(); test_crc32(); + + test_vaeseq_u8(); + test_vaesdq_u8(); + test_vaesmcq_u8(); + test_vaesimcq_u8(); } } diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index b4abc25e48..7be5ead6d0 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -544,6 +544,67 @@ pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + "llvm.aarch64.crypto.aese" | "llvm.aarch64.crypto.aesd" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let asm = match intrinsic { + "llvm.aarch64.crypto.aese" => "aese v0.16b, v1.16b", + "llvm.aarch64.crypto.aesd" => "aesd v0.16b, v1.16b", + _ => unreachable!(), + }; + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(asm.into())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v1, + )), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.aarch64.crypto.aesmc" | "llvm.aarch64.crypto.aesimc" => { + intrinsic_args!(fx, args => (a); intrinsic); + + let a = a.load_scalar(fx); + + let asm = match intrinsic { + "llvm.aarch64.crypto.aesmc" => "aesmc v0.16b, v0.16b", + "llvm.aarch64.crypto.aesimc" => "aesimc v0.16b, v0.16b", + _ => unreachable!(), + }; + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(asm.into())], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + _ => { fx.tcx.dcx().warn(format!( "unsupported AArch64 llvm intrinsic {}; replacing with trap", From ac09e49c679459e52f85b8c06cceccbb6dac1d9c Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Thu, 14 May 2026 13:55:07 +0100 Subject: [PATCH 2/4] Implement aarch64 SHA-256 LLVM intrinsics --- example/neon.rs | 52 ++++++++++++++++ src/intrinsics/llvm_aarch64.rs | 107 +++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) diff --git a/example/neon.rs b/example/neon.rs index baf560c591..1a1826341d 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -307,6 +307,53 @@ unsafe fn test_vaesimcq_u8() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha2")] +unsafe fn test_vsha256hq_u32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.sha256h + let a = u32x4::from([0, 1, 2, 3]); + let b = u32x4::from([4, 5, 6, 7]); + let c = u32x4::from([8, 9, 10, 11]); + let e = u32x4::from([0x27bb4ae0, 0xd8f61f7c, 0xb7c1ecdc, 0x10800215]); + let r: u32x4 = unsafe { transmute(vsha256hq_u32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha2")] +unsafe fn test_vsha256h2q_u32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.sha256h2 + let a = u32x4::from([0, 1, 2, 3]); + let b = u32x4::from([4, 5, 6, 7]); + let c = u32x4::from([8, 9, 10, 11]); + let e = u32x4::from([0x6989ee0d, 0x4b055920, 0x52800a12, 0x00000014]); + let r: u32x4 = unsafe { transmute(vsha256h2q_u32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha2")] +unsafe fn test_vsha256su0q_u32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.sha256su0 + let a = u32x4::from([0, 1, 2, 3]); + let b = u32x4::from([4, 5, 6, 7]); + let e = u32x4::from([0x02004000, 0x04008001, 0x0600c002, 0x08010003]); + let r: u32x4 = unsafe { transmute(vsha256su0q_u32(transmute(a), transmute(b))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha2")] +unsafe fn test_vsha256su1q_u32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.sha256su1 + let a = u32x4::from([0, 1, 2, 3]); + let b = u32x4::from([4, 5, 6, 7]); + let c = u32x4::from([8, 9, 10, 11]); + let e = u32x4::from([0x00044005, 0x0004e007, 0xa802211b, 0xec036145]); + let r: u32x4 = unsafe { transmute(vsha256su1q_u32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] fn main() { unsafe { @@ -346,6 +393,11 @@ fn main() { test_vaesdq_u8(); test_vaesmcq_u8(); test_vaesimcq_u8(); + + test_vsha256hq_u32(); + test_vsha256h2q_u32(); + test_vsha256su0q_u32(); + test_vsha256su1q_u32(); } } diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index 7be5ead6d0..83f26e1349 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -605,6 +605,113 @@ pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + "llvm.aarch64.crypto.sha256h" | "llvm.aarch64.crypto.sha256h2" => { + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + let c = c.load_scalar(fx); + + let asm = match intrinsic { + "llvm.aarch64.crypto.sha256h" => "sha256h q0, q1, v2.4s", + "llvm.aarch64.crypto.sha256h2" => "sha256h2 q0, q1, v2.4s", + _ => unreachable!(), + }; + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(asm.into())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v1, + )), + value: b, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v2, + )), + value: c, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.aarch64.crypto.sha256su0" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256su0 v0.4s, v1.4s".into())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v1, + )), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.aarch64.crypto.sha256su1" => { + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + let c = c.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256su1 v0.4s, v1.4s, v2.4s".into())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v1, + )), + value: b, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v2, + )), + value: c, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + _ => { fx.tcx.dcx().warn(format!( "unsupported AArch64 llvm intrinsic {}; replacing with trap", From cd848fb353c2cafb174ac2de77adba0ff58a2775 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Thu, 14 May 2026 14:06:49 +0100 Subject: [PATCH 3/4] Implement aarch64 PMULL LLVM intrinsics --- example/neon.rs | 24 +++++++++++ src/intrinsics/llvm_aarch64.rs | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/example/neon.rs b/example/neon.rs index 1a1826341d..4c2a0d9871 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -354,6 +354,27 @@ unsafe fn test_vsha256su1q_u32() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "aes")] +fn test_vmull_p64() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.pmull64 + let a: u64 = 3; + let b: u64 = 6; + let e: u128 = 10; + let r: u128 = vmull_p64(a, b); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vmull_p8() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.pmull.v8i16 + let a = u8x8::from([0, 1, 2, 3, 4, 5, 6, 7]); + let b = u8x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let e = u16x8::from([0x0000, 0x0009, 0x0014, 0x001d, 0x0030, 0x0039, 0x0024, 0x002d]); + let r: u16x8 = unsafe { transmute(vmull_p8(transmute(a), transmute(b))) }; + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] fn main() { unsafe { @@ -398,6 +419,9 @@ fn main() { test_vsha256h2q_u32(); test_vsha256su0q_u32(); test_vsha256su1q_u32(); + + test_vmull_p64(); + test_vmull_p8(); } } diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index 83f26e1349..6f430542fc 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -712,6 +712,81 @@ pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + "llvm.aarch64.neon.pmull64" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + "fmov d0, x0 + fmov d1, x1 + pmull v0.1q, v0.1d, v1.1d" + .into(), + )], + &[ + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + late: true, + place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x0, + )), + value: a, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x1, + )), + value: b, + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v1, + )), + late: true, + place: None, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.aarch64.neon.pmull.v8i16" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("pmull v0.8h, v0.8b, v1.8b".into())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v0, + )), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::v1, + )), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + _ => { fx.tcx.dcx().warn(format!( "unsupported AArch64 llvm intrinsic {}; replacing with trap", From f26c70e47ba09523f0325203286bc9d19eb3969a Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Thu, 14 May 2026 14:12:56 +0100 Subject: [PATCH 4/4] Enable `graviola` tests on aarch64 --- build_system/tests.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build_system/tests.rs b/build_system/tests.rs index 035badb878..685bf8ce9a 100644 --- a/build_system/tests.rs +++ b/build_system/tests.rs @@ -215,8 +215,7 @@ const EXTENDED_SYSROOT_SUITE: &[TestCase] = &[ TestCase::custom("test.graviola", &|runner| { let (arch, _) = runner.target_compiler.triple.split_once('-').unwrap(); - // FIXME: Disable `aarch64` until intrinsics are supported. - if !["x86_64"].contains(&arch) { + if !["aarch64", "x86_64"].contains(&arch) { eprintln!("Skipping `graviola` tests: unsupported target"); return; }