diff --git a/Justfile b/Justfile
index bd569fa64..49b8405aa 100644
--- a/Justfile
+++ b/Justfile
@@ -24,9 +24,11 @@ export CROSS_CONTAINER_GID := if path_exists("/dev/kvm") == "true" { kvm-gid } e
 root := justfile_directory()
 
 default-target := "debug"
-simpleguest_source := "src/tests/rust_guests/simpleguest/target/x86_64-hyperlight-none"
-dummyguest_source := "src/tests/rust_guests/dummyguest/target/x86_64-hyperlight-none"
-witguest_source := "src/tests/rust_guests/witguest/target/x86_64-hyperlight-none"
+hyperlight-target-arch := env("HYPERLIGHT_TARGET", arch())
+hyperlight-target := if hyperlight-target-arch == "x86_64" { "x86_64-hyperlight-none" } else if hyperlight-target-arch == "aarch64" { "aarch64-hyperlight-none" } else { error("Unsupported architecture: " + arch()) }
+simpleguest_source := "src/tests/rust_guests/simpleguest/target/" + hyperlight-target
+dummyguest_source := "src/tests/rust_guests/dummyguest/target/" + hyperlight-target
+witguest_source := "src/tests/rust_guests/witguest/target/" + hyperlight-target
 rust_guests_bin_dir := "src/tests/rust_guests/bin"
 
 ################
diff --git a/c.just b/c.just
index 92bcf8f6c..54cf0cc37 100644
--- a/c.just
+++ b/c.just
@@ -1,8 +1,9 @@
 mkdir := if os() == "windows" { "mkdir -f -p" } else { "mkdir -p"} 
 
 # Elf options
+hyperlight-target-c := if hyperlight-target-arch == "x86_64" { "x86_64-unknown-linux-none" } else if hyperlight-target-arch == "aarch64" { "aarch64-unknown-linux-none" } else { error("Unsupported architecture: " + hyperlight-target-arch) }
 # We don't support stack protectors at the moment, but Arch Linux clang auto-enables them for -linux platforms, so explicitly disable them.
-c-compile-options-elf := '-nostdlibinc -H --target=x86_64-unknown-linux-none -fno-stack-protector -fstack-clash-protection -mstack-probe-size=4096 -fPIC'
+c-compile-options-elf := f'-nostdlibinc -H --target={{hyperlight-target-c}} -fno-stack-protector -fstack-clash-protection -mstack-probe-size=4096 -fPIC'
 c-include-flags-elf := "-I " + root / "src/hyperlight_guest_capi/include/"  + " -I " + root / "src/hyperlight_libc/third_party/picolibc/libc/include/" + " -I " + root / "src/hyperlight_libc/third_party/picolibc/libc/stdio/" + " -I " + root / "src/hyperlight_libc/include/"
 c-linker-options-elf := '--entry "entrypoint" --nostdlib -pie --no-dynamic-linker'
 c-flags-debug-elf := '-O0'
@@ -19,7 +20,7 @@ compile-c-guest target=default-target:
 
 link-c-guest target=default-target:
     # elf
-    cd src/tests/c_guests/c_simpleguest && ld.lld -o out/{{target}}/simpleguest {{c-linker-options-elf}} out/{{target}}/main.o -l hyperlight_guest_capi -L "{{justfile_directory()}}/target/x86_64-hyperlight-none/{{target}}"
+    cd src/tests/c_guests/c_simpleguest && ld.lld -o out/{{target}}/simpleguest {{c-linker-options-elf}} out/{{target}}/main.o -l hyperlight_guest_capi -L "{{justfile_directory()}}/target/{{hyperlight-target}}/{{target}}"
 
 move-c-guests target=default-target:
     # elf
diff --git a/docs/paging-development-notes.md b/docs/paging-development-notes.md
index 573c2f77b..da08f6f24 100644
--- a/docs/paging-development-notes.md
+++ b/docs/paging-development-notes.md
@@ -172,3 +172,9 @@ below the exception stack within the scratch region.
 
 Hyperlight unconditionally uses 48-bit virtual addresses (4-level
 paging) and enables PAE.  The guest is always entered in long mode.
+
+## aarch64
+
+Hyperlight unconditionally uses 48-bit virtual addresses. Hyperlight
+presently only uses addresses in the lower (ttbr0) half of the address range.
+
diff --git a/flake.nix b/flake.nix
index cbec54678..d1604117d 100644
--- a/flake.nix
+++ b/flake.nix
@@ -51,7 +51,7 @@
               toolchainVersionAttrs = args;
             };
           })) // {
-            targetPlatforms = [ "x86_64-linux" ];
+            targetPlatforms = [ "aarch64-linux" "x86_64-linux" ];
             badTargetPlatforms = [ ];
           };
           overrideRustPkg = pkg: self.lib.makeOverridable (origArgs:
@@ -68,21 +68,21 @@
               args = [ "-c" "declare > $out" ];
             });
         in {
-          shells.default = gcrootForShell devShells.x86_64-linux.default;
+          shells.x86_64-linux.default = gcrootForShell devShells.x86_64-linux.default;
+          shells.aarch64-linux.default = gcrootForShell devShells.aarch64-linux.default;
         };
-      devShells.x86_64-linux.default =
-        let pkgs = import nixpkgs {
-              system = "x86_64-linux";
-              overlays = [ (import (nixpkgs-mozilla + "/rust-overlay.nix")) overlays.fix-rust ];
-            };
-        in with pkgs; let
+      devShells = nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed (system: {
+        default = let pkgs = import nixpkgs {
+          inherit system;
+          overlays = [ (import (nixpkgs-mozilla + "/rust-overlay.nix")) overlays.fix-rust ];
+        }; in with pkgs; let
           customisedRustChannelOf = args:
             lib.flip builtins.mapAttrs (rustChannelOf args) (_: pkg: pkg.override {
               targets = [
                 "x86_64-unknown-linux-gnu"
                 "x86_64-pc-windows-msvc" "x86_64-unknown-none"
                 "wasm32-wasip1" "wasm32-wasip2" "wasm32-unknown-unknown"
-                "i686-unknown-linux-gnu"
+                "i686-unknown-linux-gnu" "aarch64-unknown-none"
               ];
               extensions = [ "rust-src" ] ++ (if args.channel == "nightly" then [ "miri-preview" ] else []);
             });
@@ -227,10 +227,11 @@
             src = fetchFromGitHub {
               owner = "hyperlight-dev";
               repo = "cargo-hyperlight";
-              tag = "v${version}";
-              hash = "sha256-xq4/c69N0wG/I8WOYVloo0J0JqoSIKiWWtECdSKrsxo=";
+              rev = "28ac7b57e8e7b83f80bd601f1fab334aa3ae6d4a";
+              hash = "sha256-a/mvPEDJycrCbmd826SmFdasE8BFtMkCsefCNR5JnkM=";
             };
-            cargoHash = "sha256-muiMVrK1TydQiMitihfo7xYidqUIIQ+Hw3BIeo5rLFw=";
+            cargoHash = "sha256-wLapaao8qcB/toltV/xjQ7SXXcfh2J19nw6jWljmb2s=";
+            doCheck = false;
           };
         in (buildRustPackageClang (mkDerivationAttrs: {
           pname = "hyperlight";
@@ -280,5 +281,6 @@
         })).overrideAttrs(oA: {
           hardeningDisable = [ "all" ];
         });
+      });
     };
 }
diff --git a/src/hyperlight_common/src/arch/aarch64/layout.rs b/src/hyperlight_common/src/arch/aarch64/layout.rs
index 20f17026c..cb32cfe8e 100644
--- a/src/hyperlight_common/src/arch/aarch64/layout.rs
+++ b/src/hyperlight_common/src/arch/aarch64/layout.rs
@@ -14,12 +14,21 @@ See the License for the specific language governing permissions and
 limitations under the License.
  */
 
-// TODO(aarch64): change these, they are only provided in order to compile
-pub const MAX_GVA: usize = 0xffff_ffff_ffff_efff;
-pub const SNAPSHOT_PT_GVA_MIN: usize = 0xffff_8000_0000_0000;
-pub const SNAPSHOT_PT_GVA_MAX: usize = 0xffff_80ff_ffff_ffff;
-pub const MAX_GPA: usize = 0x0000_000f_ffff_ffff;
+// TODO: consider using the upper half, like we do on x86;
+// this would require enabling ttbr1
+pub const SCRATCH_TOP_GVA: usize = 0x0000_ffff_ffff_dfff;
+pub const SNAPSHOT_PT_GVA_MIN: usize = 0x0000_8000_0000_0000;
+pub const SNAPSHOT_PT_GVA_MAX: usize = 0x0000_80ff_ffff_ffff;
+pub const SCRATCH_TOP_GPA: usize = 0x0000_000f_ffff_efff;
 
-pub fn min_scratch_size(_input_data_size: usize, _output_data_size: usize) -> usize {
-    unimplemented!("min_scratch_size")
+pub const IO_PAGE_GVA: u64 = 0x0000_ffff_ffff_e000;
+pub const IO_PAGE_GPA: u64 = 0x0000_000f_ffff_f000;
+
+pub const fn io_page() -> Option<(crate::vmem::PhysAddr, crate::vmem::VirtAddr)> {
+    Some((IO_PAGE_GPA, IO_PAGE_GVA))
+}
+
+pub fn min_scratch_size(input_data_size: usize, output_data_size: usize) -> usize {
+    (input_data_size + output_data_size).next_multiple_of(crate::vmem::PAGE_SIZE)
+        + 12 * crate::vmem::PAGE_SIZE
 }
diff --git a/src/hyperlight_common/src/arch/aarch64/vmem.rs b/src/hyperlight_common/src/arch/aarch64/vmem.rs
index 3c83cabc8..9332494cf 100644
--- a/src/hyperlight_common/src/arch/aarch64/vmem.rs
+++ b/src/hyperlight_common/src/arch/aarch64/vmem.rs
@@ -14,9 +14,18 @@ See the License for the specific language governing permissions and
 limitations under the License.
  */
 
-// TODO(aarch64): implement real page table operations
+use alloc::collections::BTreeMap;
+use alloc::collections::btree_map::Entry;
+use alloc::rc::Rc;
+use alloc::vec::Vec;
+use core::cell::RefCell;
 
-use crate::vmem::{Mapping, TableOps, TableReadOps, Void};
+use crate::vmem::sealed::TableMovabilityBase;
+use crate::vmem::{
+    BasicMapping, CowMapping, MapRequest, MapResponse, Mapping, MappingKind, SpaceAwareMapping,
+    SpaceId, SpaceReferenceMapping, TableOps, TableReadOps, UpdateParent, UpdateParentNone,
+    UpdateParentTable, Void, bits, modify_ptes, write_entry_updating,
+};
 
 pub const PAGE_SIZE: usize = 4096;
 pub const PAGE_TABLE_SIZE: usize = 4096;
@@ -26,38 +35,506 @@ pub type PageTableEntry = u64;
 pub type VirtAddr = u64;
 pub type PhysAddr = u64;
 
+const VA_BITS: usize = 48;
+pub const ATTR_INDEX_NORMAL: u8 = 0;
+const SOFTWARE_USE_COW: u8 = 0b1;
+
+// Utility structures
+impl<
+    Op: TableOps<TableMovability = crate::vmem::MayMoveTable>,
+    P: UpdateParent<Op, TableMoveInfo = Op::TableAddr>,
+> UpdateParent<Op> for UpdateParentTable<Op, P>
+{
+    type TableMoveInfo = Op::TableAddr;
+    type ChildType = UpdateParentTable<Op, Self>;
+    fn update_parent(self, op: &Op, new_ptr: Op::TableAddr) {
+        let pte = desc_for_table::<Op>(new_ptr);
+        unsafe {
+            write_entry_updating(op, self.parent, self.entry_ptr, pte);
+        }
+    }
+    fn for_child_at_entry(self, entry_ptr: Op::TableAddr) -> Self::ChildType {
+        Self::ChildType::new(self, entry_ptr)
+    }
+}
+#[derive(Copy, Clone)]
+pub(in crate::vmem) struct UpdateParentRoot {}
+impl<Op: TableOps<TableMovability = crate::vmem::MayMoveTable>> UpdateParent<Op>
+    for UpdateParentRoot
+{
+    type TableMoveInfo = Op::TableAddr;
+    type ChildType = UpdateParentTable<Op, Self>;
+    fn update_parent(self, op: &Op, new_ptr: Op::TableAddr) {
+        unsafe {
+            op.update_root(new_ptr);
+        }
+    }
+    fn for_child_at_entry(self, entry_ptr: Op::TableAddr) -> Self::ChildType {
+        Self::ChildType::new(self, entry_ptr)
+    }
+}
+/// This trait is used to select appropriate implementations of
+/// [`UpdateParent`] to be used, depending on whether a particular
+/// implementation needs the ability to move tables.
+pub(in crate::vmem) trait TableMovability<Op: TableReadOps + ?Sized, TableMoveInfo> {
+    type RootUpdateParent: UpdateParent<Op, TableMoveInfo = TableMoveInfo>;
+    fn root_update_parent() -> Self::RootUpdateParent;
+}
+impl<Op: TableOps<TableMovability = crate::vmem::MayMoveTable>> TableMovability<Op, Op::TableAddr>
+    for crate::vmem::MayMoveTable
+{
+    type RootUpdateParent = UpdateParentRoot;
+    fn root_update_parent() -> Self::RootUpdateParent {
+        UpdateParentRoot {}
+    }
+}
+impl<Op: TableReadOps> TableMovability<Op, Void> for crate::vmem::MayNotMoveTable {
+    type RootUpdateParent = UpdateParentNone;
+    fn root_update_parent() -> Self::RootUpdateParent {
+        UpdateParentNone {}
+    }
+}
+
+#[allow(clippy::identity_op)]
+#[allow(clippy::precedence)]
+fn desc_for_table<Op: TableOps>(table_addr: Op::TableAddr) -> u64 {
+    Op::to_phys(table_addr) |
+        // Don't set APTable[1:0] - we don't use hierachical permissions
+        // Don't set {U,P,}XNTable - we don't use hierarchical permissions
+        // Don't set set Protected - we don't use FEAT_THE
+        // We don't need to set AF on a table descriptor to avoid AF
+        // faults. Since we don't enable FEAT_HAFT, there is no AF on
+        // table descriptors, only on page descriptors.
+        0b11 // table descriptor
+}
+
+// We do not currently use hugepage mappings in the guest, and so we
+// do not need to worry about block descriptors at intermediate
+// levels.
+
+#[allow(clippy::identity_op)]
+#[allow(clippy::precedence)]
+fn desc_for_page(
+    page_addr: u64,
+    _readable: bool,
+    writable: bool,
+    executable: bool,
+    software_use: u8,
+    user_accessible: bool,
+) -> u64 {
+    // todo: make use of the Contiguous bit to reduce tlb pressure
+    let xn = match (executable, user_accessible) {
+        (true, true) => 0,
+        (true, false) => 0b10,
+        (false, _) => 0b11,
+    };
+    let ap = match (writable, user_accessible) {
+        (true, true) => 0b01,
+        (true, false) => 0b00,
+        (false, true) => 0b11,
+        (false, false) => 0b10,
+    };
+    page_addr |
+        ((software_use as u64 & 0xf) << 55) |
+        (xn << 53) |
+        // we do not use hardware management of the dirty state
+        // If we support hugepage block descriptors in the future, we
+        // will need to support setting the nT bit here when the
+        // hardware supports FEAT_BBM Level 1
+        (0b1 << 11) | // always set nG for now, since multi-space
+                      // support is not properly reflected in the
+                      // mapping API.
+        (0b1 << 10) | // we don't need AF tracking, so set it always
+        (0b11 << 8) | // Inner Shareable
+        (ap << 6) |
+        ((ATTR_INDEX_NORMAL as u64) << 2) |
+        0b11
+}
+
+#[allow(clippy::identity_op)]
+#[allow(clippy::precedence)]
+// Produces only page descriptors valid at Level 3; there is presently
+// no support for block descriptors valid at earlier levels
+unsafe fn map_page<
+    Op: TableOps,
+    P: UpdateParent<
+            Op,
+            TableMoveInfo = <Op::TableMovability as TableMovabilityBase<Op>>::TableMoveInfo,
+        >,
+>(
+    op: &Op,
+    mapping: &Mapping,
+    r: MapResponse<Op, P>,
+) {
+    let presumed_base = mapping.phys_base + (r.vmin - mapping.virt_base);
+    let desc = match &mapping.kind {
+        MappingKind::Basic(bm) => desc_for_page(
+            presumed_base,
+            bm.readable,
+            bm.writable,
+            bm.executable,
+            0,
+            mapping.user_accessible,
+        ),
+        MappingKind::Cow(cm) => desc_for_page(
+            presumed_base,
+            cm.readable,
+            false,
+            cm.executable,
+            SOFTWARE_USE_COW,
+            mapping.user_accessible,
+        ),
+        MappingKind::Unmapped => 0,
+    };
+    unsafe {
+        write_entry_updating(op, r.update_parent, r.entry_ptr, desc);
+    }
+}
+
+enum FinalLevelDescriptorKind {
+    Page,
+}
+enum EarlyLevelDescriptorKind {
+    Block,
+    Table,
+}
+fn final_level_descriptor_kind(desc: u64) -> Option<FinalLevelDescriptorKind> {
+    if desc & 3 == 3 {
+        Some(FinalLevelDescriptorKind::Page)
+    } else {
+        None
+    }
+}
+fn early_level_descriptor_kind(desc: u64) -> Option<EarlyLevelDescriptorKind> {
+    match desc & 0b11 {
+        0b01 => Some(EarlyLevelDescriptorKind::Block),
+        0b11 => Some(EarlyLevelDescriptorKind::Table),
+        _ => None,
+    }
+}
+
+unsafe fn next_level_table_if_present<Op: TableReadOps>(
+    op: &Op,
+    addr: Op::TableAddr,
+) -> Option<Op::TableAddr> {
+    let desc: u64 = unsafe { op.read_entry(addr) };
+    if let Some(EarlyLevelDescriptorKind::Table) = early_level_descriptor_kind(desc) {
+        Some(Op::from_phys(bits::<47, 12>(desc) << 12))
+    } else {
+        None
+    }
+}
+
+/// Page-mapping callback to allocate a next-level page table if necessary.
+///
+/// Should only be called on a [`MapResponse`] representing an entry
+/// at level < 3, since it allocates a next-level table.
+/// # Safety
+/// This function modifies page table data structures, and should not be called concurrently
+/// with any other operations that modify the page tables.
+unsafe fn alloc_table_if_needed<
+    Op: TableOps,
+    P: UpdateParent<
+            Op,
+            TableMoveInfo = <Op::TableMovability as TableMovabilityBase<Op>>::TableMoveInfo,
+        >,
+>(
+    op: &Op,
+    x: MapResponse<Op, P>,
+) -> MapRequest<Op, P::ChildType>
+where
+    P::ChildType: UpdateParent<Op>,
+{
+    #[cfg(target_os = "linux")]
+    extern crate std;
+
+    let new_update_parent = x.update_parent.for_child_at_entry(x.entry_ptr);
+    if let Some(table_base) = unsafe { next_level_table_if_present(op, x.entry_ptr) } {
+        return MapRequest {
+            table_base,
+            vmin: x.vmin,
+            len: x.len,
+            update_parent: new_update_parent,
+        };
+    }
+    // If we eventually support huge pages, we will need to check if
+    // there was a Block descriptor here and follow the
+    // break-before-make sequence.
+
+    let page_addr = unsafe { op.alloc_table() };
+
+    let pte = desc_for_table::<Op>(page_addr);
+    unsafe {
+        write_entry_updating(op, x.update_parent, x.entry_ptr, pte);
+    };
+    MapRequest {
+        table_base: page_addr,
+        vmin: x.vmin,
+        len: x.len,
+        update_parent: new_update_parent,
+    }
+}
+
+unsafe fn require_table_exist<Op: TableReadOps, P: UpdateParent<Op>>(
+    op: &Op,
+    x: MapResponse<Op, P>,
+) -> Option<MapRequest<Op, P::ChildType>>
+where
+    P::ChildType: UpdateParent<Op>,
+{
+    unsafe {
+        next_level_table_if_present(op, x.entry_ptr).map(|table_base| MapRequest {
+            table_base,
+            vmin: x.vmin,
+            len: x.len,
+            update_parent: x.update_parent.for_child_at_entry(x.entry_ptr),
+        })
+    }
+}
+
+enum WalkNextLevelResponse<Op: TableReadOps, P: UpdateParent<Op>> {
+    WalkNextLevel(MapResponse<Op, P>),
+    AlreadySeen(SpaceReferenceMapping),
+}
+
+enum WalkNextLevelRequest<Op: TableReadOps, P: UpdateParent<Op>> {
+    WalkNextLevel(MapRequest<Op, P>),
+    AlreadySeen(SpaceReferenceMapping),
+}
+fn walk_check_request_seen<Op: TableReadOps, P: UpdateParent<Op>>(
+    seen_pts: &Option<Rc<RefCell<BTreeMap<u64, SpaceReferenceMapping>>>>,
+    space: SpaceId,
+    depth: usize,
+    rq: MapRequest<Op, P>,
+) -> WalkNextLevelRequest<Op, P> {
+    let Some(seen_pts) = seen_pts else {
+        return WalkNextLevelRequest::WalkNextLevel(rq);
+    };
+    match seen_pts.borrow_mut().entry(Op::to_phys(rq.table_base)) {
+        Entry::Vacant(ve) => {
+            ve.insert(SpaceReferenceMapping {
+                depth,
+                space,
+                our_va: 0,
+                their_va: rq.vmin,
+            });
+            WalkNextLevelRequest::WalkNextLevel(rq)
+        }
+        Entry::Occupied(oe) => {
+            let mut sm = *oe.get();
+            if sm.depth != depth {
+                // Sharing a page table at different levels like this
+                // is not supported in the Hyperlight memory
+                // model. Ignore the "sharing".
+                WalkNextLevelRequest::WalkNextLevel(rq)
+            } else {
+                sm.our_va = rq.vmin;
+                WalkNextLevelRequest::AlreadySeen(sm)
+            }
+        }
+    }
+}
+fn walk_next_level_table<Op: TableReadOps, P: UpdateParent<Op>>(
+    op: &Op,
+    x: WalkNextLevelResponse<Op, P>,
+    next_depth: usize,
+    space: SpaceId,
+    seen_pts: &Option<Rc<RefCell<BTreeMap<u64, SpaceReferenceMapping>>>>,
+) -> Option<WalkNextLevelRequest<Op, P::ChildType>>
+where
+    P::ChildType: UpdateParent<Op>,
+{
+    let rq = match x {
+        WalkNextLevelResponse::WalkNextLevel(rq) => rq,
+        WalkNextLevelResponse::AlreadySeen(sm) => {
+            return Some(WalkNextLevelRequest::AlreadySeen(sm));
+        }
+    };
+    let next_base = unsafe { require_table_exist(op, rq)? };
+    Some(walk_check_request_seen(
+        seen_pts, space, next_depth, next_base,
+    ))
+}
+
+fn do_walk_next_level<
+    const HIGH_BIT: u8,
+    const LOW_BIT: u8,
+    Op: TableReadOps,
+    P: UpdateParent<Op>,
+>(
+    x: WalkNextLevelRequest<Op, P>,
+) -> impl Iterator<Item = WalkNextLevelResponse<Op, P>> {
+    let (iter_a, iter_b) = match x {
+        WalkNextLevelRequest::WalkNextLevel(rq) => (
+            Some(
+                modify_ptes::<HIGH_BIT, LOW_BIT, Op, P>(rq)
+                    .map(|r| WalkNextLevelResponse::WalkNextLevel(r)),
+            ),
+            None,
+        ),
+        WalkNextLevelRequest::AlreadySeen(sm) => (
+            None,
+            Some(core::iter::once(WalkNextLevelResponse::AlreadySeen(sm))),
+        ),
+    };
+    iter_a
+        .into_iter()
+        .flatten()
+        .chain(iter_b.into_iter().flatten())
+}
+
 /// # Safety
 /// See `TableOps` documentation.
 #[allow(clippy::missing_safety_doc)]
-pub unsafe fn map<Op: TableOps>(_op: &Op, _mapping: Mapping) {
-    unimplemented!("map")
+pub unsafe fn map<Op: TableOps>(op: &Op, mapping: Mapping) {
+    modify_ptes::<47, 39, Op, _>(MapRequest {
+        table_base: op.root_table(),
+        vmin: mapping.virt_base,
+        len: mapping.len,
+        update_parent: Op::TableMovability::root_update_parent(),
+    })
+    .map(|r| unsafe { alloc_table_if_needed(op, r) })
+    .flat_map(modify_ptes::<38, 30, Op, _>)
+    .map(|r| unsafe { alloc_table_if_needed(op, r) })
+    .flat_map(modify_ptes::<29, 21, Op, _>)
+    .map(|r| unsafe { alloc_table_if_needed(op, r) })
+    .flat_map(modify_ptes::<20, 12, Op, _>)
+    .map(|r| unsafe { map_page(op, &mapping, r) })
+    .for_each(drop);
 }
 
 /// # Safety
 /// See `TableReadOps` documentation.
 #[allow(clippy::missing_safety_doc)]
 pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>(
-    _op: impl core::convert::AsRef<Op> + Copy + 'a,
-    _address: u64,
-    _len: u64,
+    op: impl core::convert::AsRef<Op> + Copy + 'a,
+    address: u64,
+    len: u64,
 ) -> impl Iterator<Item = Mapping> + 'a {
-    unimplemented!("virt_to_phys");
-    #[allow(unreachable_code)]
-    core::iter::empty()
+    let roots = core::iter::once(op.as_ref().root_table());
+    unsafe {
+        internal_walk_va_spaces(op, roots, false, address, len)
+            .flat_map(|(_, mappings)| mappings)
+            .filter_map(|saw| match saw {
+                SpaceAwareMapping::ThisSpace(m) => Some(m),
+                // this is guaranteed to never actually happen, both since
+                // we only passed one root and since we passed do_dedup =
+                // false
+                SpaceAwareMapping::AnotherSpace(_) => None,
+            })
+    }
+}
+
+#[allow(clippy::missing_safety_doc)]
+unsafe fn internal_walk_va_spaces<'a, Op: TableReadOps + 'a>(
+    op: impl core::convert::AsRef<Op> + Copy + 'a,
+    roots: impl Iterator<Item = Op::TableAddr> + 'a,
+    // todo - type magic could unify virt_to_phys and walk_va_spaces
+    do_dedup: bool,
+    address: u64,
+    len: u64,
+) -> impl Iterator<
+    Item = (
+        SpaceId,
+        impl Iterator<Item = crate::vmem::SpaceAwareMapping>,
+    ),
+> + 'a {
+    #[cfg(target_os = "linux")]
+    extern crate std;
+    let addr = address & ((1u64 << VA_BITS) - 1);
+    let vmin = addr & !((PAGE_SIZE as u64) - 1);
+    let vmax = core::cmp::min(addr + len, 1u64 << VA_BITS);
+    let seen_pts: Option<Rc<RefCell<BTreeMap<u64, SpaceReferenceMapping>>>> = if do_dedup {
+        Some(Rc::new(RefCell::new(BTreeMap::new())))
+    } else {
+        None
+    };
+    roots.into_iter().map(move |root| {
+        let root_id = Op::to_phys(root);
+        let root_req = walk_check_request_seen(
+            &seen_pts,
+            root_id,
+            0,
+            MapRequest {
+                table_base: root,
+                vmin,
+                len: vmax.saturating_sub(vmin),
+                update_parent: UpdateParentNone {},
+            },
+        );
+        let seen_pts_1 = seen_pts.clone();
+        let seen_pts_2 = seen_pts.clone();
+        let seen_pts_3 = seen_pts.clone();
+        let iter = do_walk_next_level::<47, 39, Op, _>(root_req)
+            .filter_map(move |r| walk_next_level_table(op.as_ref(), r, 1, root_id, &seen_pts_1))
+            .flat_map(do_walk_next_level::<38, 30, Op, _>)
+            .filter_map(move |r| walk_next_level_table(op.as_ref(), r, 2, root_id, &seen_pts_2))
+            .flat_map(do_walk_next_level::<29, 21, Op, _>)
+            .filter_map(move |r| walk_next_level_table(op.as_ref(), r, 3, root_id, &seen_pts_3))
+            .flat_map(do_walk_next_level::<20, 12, Op, _>)
+            .filter_map(move |r| {
+                let rq = match r {
+                    WalkNextLevelResponse::AlreadySeen(sm) => {
+                        return Some(SpaceAwareMapping::AnotherSpace(sm));
+                    }
+                    WalkNextLevelResponse::WalkNextLevel(rq) => rq,
+                };
+                let desc = unsafe { op.as_ref().read_entry(rq.entry_ptr) };
+                if let Some(FinalLevelDescriptorKind::Page) = final_level_descriptor_kind(desc) {
+                    let phys_addr = bits::<47, 12>(desc) << 12;
+                    // Don't sign-extend to canonicalise, because we
+                    // only uses addresess in the lower half right
+                    // now---VA_BITS does not include the bit that
+                    // selects between the ttbr0 and ttbr1 spaces.
+                    let virt_addr = rq.vmin;
+                    // The division of flags in the mapping structure
+                    // does not perfectly capture the fact that
+                    // user-level data and instruction access
+                    // permissions can be different.  For now, we just
+                    // assume that the mapping should be marked as
+                    // executable if it was executable to the kernel
+                    // at all.
+                    let executable = bits::<53, 53>(desc) == 0;
+                    let user_accessible = bits::<6, 6>(desc) != 0; // AP[1]
+                    let kind = if bits::<58, 55>(desc) == SOFTWARE_USE_COW as u64 {
+                        MappingKind::Cow(CowMapping {
+                            readable: true,
+                            executable,
+                        })
+                    } else {
+                        MappingKind::Basic(BasicMapping {
+                            readable: true,
+                            writable: bits::<7, 7>(desc) == 0, // AP[2]
+                            executable,
+                        })
+                    };
+                    Some(SpaceAwareMapping::ThisSpace(Mapping {
+                        phys_base: phys_addr,
+                        virt_base: virt_addr,
+                        len: PAGE_SIZE as u64,
+                        kind,
+                        user_accessible,
+                    }))
+                } else {
+                    None // do nothing - there is no mapping to record here
+                }
+            });
+        (root_id, iter)
+    })
 }
 
-/// Stub — see [`crate::vmem::walk_va_spaces`].
 #[allow(clippy::missing_safety_doc)]
 pub unsafe fn walk_va_spaces<Op: TableReadOps>(
-    _op: &Op,
-    _roots: &[Op::TableAddr],
-    _address: u64,
-    _len: u64,
-) -> ::alloc::vec::Vec<(
-    crate::vmem::SpaceId,
-    ::alloc::vec::Vec<crate::vmem::SpaceAwareMapping>,
-)> {
-    ::alloc::vec::Vec::new()
+    op: impl core::convert::AsRef<Op> + Copy,
+    roots: &[Op::TableAddr],
+    address: u64,
+    len: u64,
+) -> Vec<(SpaceId, Vec<crate::vmem::SpaceAwareMapping>)> {
+    unsafe {
+        internal_walk_va_spaces(&op, roots.iter().cloned(), true, address, len)
+            .map(|(id, mappings)| (id, mappings.collect::<Vec<_>>()))
+            .collect::<Vec<_>>()
+    }
 }
 
 /// Stub — see [`crate::vmem::space_aware_map`].
@@ -67,11 +544,7 @@ pub unsafe fn space_aware_map<Op: TableOps>(
     _ref_map: crate::vmem::SpaceReferenceMapping,
     _built_roots: &::alloc::collections::BTreeMap<crate::vmem::SpaceId, Op::TableAddr>,
 ) {
+    // in practice, we never construct page tables that would result
+    // in reaching this right now. todo: implement this properly
+    unreachable!()
 }
-
-pub trait TableMovability<Op: TableReadOps + ?Sized, TableMoveInfo> {}
-impl<Op: TableOps<TableMovability = crate::vmem::MayMoveTable>> TableMovability<Op, Op::TableAddr>
-    for crate::vmem::MayMoveTable
-{
-}
-impl<Op: TableReadOps> TableMovability<Op, Void> for crate::vmem::MayNotMoveTable {}
diff --git a/src/hyperlight_common/src/arch/amd64/layout.rs b/src/hyperlight_common/src/arch/amd64/layout.rs
index 14a9cd62a..4237caf51 100644
--- a/src/hyperlight_common/src/arch/amd64/layout.rs
+++ b/src/hyperlight_common/src/arch/amd64/layout.rs
@@ -21,7 +21,7 @@ limitations under the License.
 /// We have this the top of the page below the top of memory in order
 /// to make working with start/end ptrs in a few places more
 /// convenient (not needing to worry about overflow)
-pub const MAX_GVA: usize = 0xffff_ffff_ffff_efff;
+pub const SCRATCH_TOP_GVA: usize = 0xffff_ffff_ffff_efff;
 pub const SNAPSHOT_PT_GVA_MIN: usize = 0xffff_8000_0000_0000;
 pub const SNAPSHOT_PT_GVA_MAX: usize = 0xffff_80ff_ffff_ffff;
 
@@ -29,7 +29,11 @@ pub const SNAPSHOT_PT_GVA_MAX: usize = 0xffff_80ff_ffff_ffff;
 /// supports at least 36 bits.  Almost all of them support at least 40
 /// bits, so we could consider bumping this in the future if we were
 /// ever memory-constrained.
-pub const MAX_GPA: usize = 0x0000_000f_ffff_ffff;
+pub const SCRATCH_TOP_GPA: usize = 0x0000_000f_ffff_ffff;
+
+pub fn io_page() -> Option<(u64, u64)> {
+    None
+}
 
 /// On amd64, this is:
 /// - Two pages for the TSS and IDT
diff --git a/src/hyperlight_common/src/arch/amd64/vmem.rs b/src/hyperlight_common/src/arch/amd64/vmem.rs
index eb7a1104c..8f29316e4 100644
--- a/src/hyperlight_common/src/arch/amd64/vmem.rs
+++ b/src/hyperlight_common/src/arch/amd64/vmem.rs
@@ -27,31 +27,12 @@ limitations under the License.
 
 use crate::vmem::{
     BasicMapping, CowMapping, MapRequest, MapResponse, Mapping, MappingKind, TableMovabilityBase,
-    TableOps, TableReadOps, UpdateParent, UpdateParentNone, Void, modify_ptes,
+    TableOps, TableReadOps, UpdateParent, UpdateParentNone, UpdateParentTable, Void, modify_ptes,
     write_entry_updating,
 };
 
-/// Parent is another page table whose ancestors may also need
-/// updating when it relocates.
-pub struct UpdateParentTable<Op: TableOps, P: UpdateParent<Op>> {
-    pub(crate) parent: P,
-    pub(crate) entry_ptr: Op::TableAddr,
-}
-impl<Op: TableOps, P: UpdateParent<Op>> Clone for UpdateParentTable<Op, P> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-impl<Op: TableOps, P: UpdateParent<Op>> Copy for UpdateParentTable<Op, P> {}
-impl<Op: TableOps, P: UpdateParent<Op>> UpdateParentTable<Op, P> {
-    pub(crate) fn new(parent: P, entry_ptr: Op::TableAddr) -> Self {
-        UpdateParentTable { parent, entry_ptr }
-    }
-}
-
-/// Parent is the root (e.g. CR3).
 #[derive(Copy, Clone)]
-pub struct UpdateParentRoot {}
+pub(in crate::vmem) struct UpdateParentRoot {}
 
 /// Read a PTE and return it (widened to u64) if the present bit is
 /// set. The amd64 "present" encoding is a single bit (bit 0); other
@@ -162,7 +143,7 @@ fn pte_for_table<Op: TableOps>(table_addr: Op::TableAddr) -> u64 {
 /// This trait is used to select appropriate implementations of
 /// [`UpdateParent`] to be used, depending on whether a particular
 /// implementation needs the ability to move tables.
-pub trait TableMovability<Op: TableReadOps + ?Sized, TableMoveInfo> {
+pub(in crate::vmem) trait TableMovability<Op: TableReadOps + ?Sized, TableMoveInfo> {
     type RootUpdateParent: UpdateParent<Op, TableMoveInfo = TableMoveInfo>;
     fn root_update_parent() -> Self::RootUpdateParent;
 }
diff --git a/src/hyperlight_common/src/arch/i686/layout.rs b/src/hyperlight_common/src/arch/i686/layout.rs
index cdc3af7d1..81de7d505 100644
--- a/src/hyperlight_common/src/arch/i686/layout.rs
+++ b/src/hyperlight_common/src/arch/i686/layout.rs
@@ -16,10 +16,10 @@ limitations under the License.
 
 // i686 layout constants for 32-bit protected mode with paging.
 
-pub const MAX_GVA: usize = 0xffff_ffff;
+pub const SCRATCH_TOP_GVA: usize = 0xffff_ffff;
 /// Set below the KVM APIC access page at 0xFEE00000 to avoid EEXIST when scratch
 /// regions are large enough to reach that address.
-pub const MAX_GPA: usize = 0xFEDF_FFFF;
+pub const SCRATCH_TOP_GPA: usize = 0xFEDF_FFFF;
 
 /// Minimum scratch region size: IO buffers (page-aligned) plus 12 pages
 /// for bookkeeping and the exception stack. Page table space is validated
diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs
index 1a7ca0880..e5dde0c2d 100644
--- a/src/hyperlight_common/src/layout.rs
+++ b/src/hyperlight_common/src/layout.rs
@@ -26,7 +26,7 @@ limitations under the License.
 #[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/layout.rs")]
 mod arch;
 
-pub use arch::{MAX_GPA, MAX_GVA};
+pub use arch::{SCRATCH_TOP_GPA, SCRATCH_TOP_GVA, io_page};
 #[cfg(any(
     all(target_arch = "x86_64", not(feature = "i686-guest")),
     target_arch = "aarch64"
@@ -50,10 +50,10 @@ pub const SCRATCH_TOP_EXN_STACK_OFFSET: u64 = 0x30;
 pub const SCRATCH_TOP_GUEST_COUNTER_OFFSET: u64 = 0x1008;
 
 pub fn scratch_base_gpa(size: usize) -> u64 {
-    (MAX_GPA - size + 1) as u64
+    (SCRATCH_TOP_GPA - size + 1) as u64
 }
 pub fn scratch_base_gva(size: usize) -> u64 {
-    (MAX_GVA - size + 1) as u64
+    (SCRATCH_TOP_GVA - size + 1) as u64
 }
 
 /// Compute the minimum scratch region size needed for a sandbox.
diff --git a/src/hyperlight_common/src/vmem.rs b/src/hyperlight_common/src/vmem.rs
index 94b67319c..bb9536b09 100644
--- a/src/hyperlight_common/src/vmem.rs
+++ b/src/hyperlight_common/src/vmem.rs
@@ -42,6 +42,13 @@ pub use arch::{PAGE_PRESENT, PAGE_TABLE_SIZE, PTE_ADDR_MASK, PageTableEntry, Phy
 pub const PAGE_TABLE_ENTRIES_PER_TABLE: usize =
     PAGE_TABLE_SIZE / core::mem::size_of::<PageTableEntry>();
 
+// It would be nice not to have any arch-dependent re-exports here,
+// but on arm64 the MAIR indices used need to be synced between the
+// descriptor creation code and the register initialisation code to
+// make sure that MAIR is set up properly.
+#[cfg(target_arch = "aarch64")]
+pub use arch::ATTR_INDEX_NORMAL;
+
 // Shared page table iterator infrastructure used by each arch module.
 
 /// Utility function to extract an (inclusive on both ends) bit range
@@ -115,6 +122,31 @@ impl<Op: TableReadOps> UpdateParent<Op> for UpdateParentNone {
     }
 }
 
+/// A struct implementing [`UpdateParent`] to be used when a table's
+/// parent is another table that needs to be updated recursively.
+pub(in crate::vmem) struct UpdateParentTable<Op: TableOps, P: UpdateParent<Op>> {
+    pub(in crate::vmem) parent: P,
+    pub(in crate::vmem) entry_ptr: Op::TableAddr,
+}
+impl<Op: TableOps, P: UpdateParent<Op>> Clone for UpdateParentTable<Op, P> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+impl<Op: TableOps, P: UpdateParent<Op>> Copy for UpdateParentTable<Op, P> {}
+impl<Op: TableOps, P: UpdateParent<Op>> UpdateParentTable<Op, P> {
+    pub(in crate::vmem) fn new(parent: P, entry_ptr: Op::TableAddr) -> Self {
+        UpdateParentTable { parent, entry_ptr }
+    }
+}
+
+/// A struct implementing [`UpdateParent`] to be used when a table's
+/// parent is the "root table" (with access to that root pointer
+/// provided in an architecture/environment-insensitive manner via
+/// `TableOps`)
+#[derive(Copy, Clone)]
+pub struct UpdateParentRoot {}
+
 /// A helper structure indicating a mapping operation that needs to be
 /// performed.
 pub(in crate::vmem) struct MapRequest<Op: TableReadOps, P: UpdateParent<Op>> {
@@ -319,6 +351,7 @@ mod sealed {
 use sealed::*;
 
 /// A sealed trait used to collect some information about the marker structures [`MayMoveTable`] and [`MayNotMoveTable`]
+#[allow(private_bounds)] // this trait is intentionally sealed
 pub trait TableMovability<Op: TableReadOps + ?Sized>:
     TableMovabilityBase<Op>
     + arch::TableMovability<Op, <Self as TableMovabilityBase<Op>>::TableMoveInfo>
diff --git a/src/hyperlight_guest/src/arch/aarch64/exit.rs b/src/hyperlight_guest/src/arch/aarch64/exit.rs
index 0ac27570d..01ea692d9 100644
--- a/src/hyperlight_guest/src/arch/aarch64/exit.rs
+++ b/src/hyperlight_guest/src/arch/aarch64/exit.rs
@@ -16,7 +16,16 @@ limitations under the License.
 
 // TODO(aarch64): implement VM exit mechanism (e.g. hvc instruction)
 
+const IO_PAGE_GVA: u64 = hyperlight_common::layout::io_page().unwrap().1;
+
 /// Trigger a VM exit sending a 32-bit value to the host on the given port.
-pub(crate) unsafe fn out32(_port: u16, _val: u32) {
-    unimplemented!("aarch64 out32")
+pub(crate) unsafe fn out32(port: u16, val: u32) {
+    if port as usize > (hyperlight_common::vmem::PAGE_SIZE / core::mem::size_of::<u64>()) {
+        panic!("aarch64 mmio: unsupported hypercall number {}", port);
+    }
+    unsafe {
+        (IO_PAGE_GVA as *mut u64)
+            .wrapping_add(port as usize)
+            .write_volatile(val as u64);
+    }
 }
diff --git a/src/hyperlight_guest/src/arch/aarch64/layout.rs b/src/hyperlight_guest/src/arch/aarch64/layout.rs
index 685447ce7..59ffd29b4 100644
--- a/src/hyperlight_guest/src/arch/aarch64/layout.rs
+++ b/src/hyperlight_guest/src/arch/aarch64/layout.rs
@@ -15,17 +15,18 @@ limitations under the License.
  */
 
 // TODO(aarch64): these values are placeholders copied from amd64
-pub const MAIN_STACK_TOP_GVA: u64 = 0xffff_ff00_0000_0000;
-pub const MAIN_STACK_LIMIT_GVA: u64 = 0xffff_fe00_0000_0000;
+pub const MAIN_STACK_TOP_GVA: u64 = 0x0000_ff00_0000_0000;
+pub const MAIN_STACK_LIMIT_GVA: u64 = 0x0000_fe00_0000_0000;
 
 pub fn scratch_size() -> u64 {
-    unimplemented!("aarch64 scratch_size")
+    let addr = crate::layout::scratch_size_gva();
+    unsafe { (addr as *mut u64).read_volatile() }
 }
 
 pub fn scratch_base_gpa() -> u64 {
-    unimplemented!("aarch64 scratch_base_gpa")
+    hyperlight_common::layout::scratch_base_gpa(scratch_size() as usize)
 }
 
 pub fn scratch_base_gva() -> u64 {
-    unimplemented!("aarch64 scratch_base_gva")
+    hyperlight_common::layout::scratch_base_gva(scratch_size() as usize)
 }
diff --git a/src/hyperlight_guest/src/arch/aarch64/prim_alloc.rs b/src/hyperlight_guest/src/arch/aarch64/prim_alloc.rs
index 4a5b5d137..d49e3f936 100644
--- a/src/hyperlight_guest/src/arch/aarch64/prim_alloc.rs
+++ b/src/hyperlight_guest/src/arch/aarch64/prim_alloc.rs
@@ -14,12 +14,40 @@ See the License for the specific language governing permissions and
 limitations under the License.
  */
 
-// TODO(aarch64): implement real aarch64 page allocator
+use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode;
+use hyperlight_common::{layout, vmem};
 
 // There are no notable architecture-specific safety considerations
 // here, and the general conditions are documented in the
 // architecture-independent re-export in prim_alloc.rs
 #[allow(clippy::missing_safety_doc)]
-pub unsafe fn alloc_phys_pages(_n: u64) -> u64 {
-    unimplemented!("aarch64 alloc_phys_pages")
+pub unsafe fn alloc_phys_pages(n: u64) -> u64 {
+    let addr = crate::layout::allocator_gva();
+    let nbytes = n * vmem::PAGE_SIZE as u64;
+    let mut prev_base: u64 = 0;
+    unsafe {
+        // todo: actually check for FEAT_LSE presence.
+        core::arch::asm!("
+            ldadd {nbytes}, {prev_base}, [{addr}]
+        ",
+            addr = in(reg) addr,
+            nbytes = in(reg) nbytes,
+            prev_base = out(reg) prev_base,
+        );
+    }
+    // Set aside two pages at the top of the scratch region for the
+    // exception stack, shared state, etc
+    let max_avail = layout::SCRATCH_TOP_GPA - vmem::PAGE_SIZE * 2;
+    if prev_base
+        .checked_add(nbytes)
+        .is_none_or(|xx| xx >= max_avail as u64)
+    {
+        unsafe {
+            crate::exit::abort_with_code_and_message(
+                &[ErrorCode::MallocFailed as u8],
+                c"Out of physical memory".as_ptr(),
+            )
+        }
+    }
+    prev_base
 }
diff --git a/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs b/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs
index cfaad9a0b..e1d388c64 100644
--- a/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs
+++ b/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs
@@ -33,7 +33,8 @@ pub unsafe fn alloc_phys_pages(n: u64) -> u64 {
     }
     // Set aside two pages at the top of the scratch region for the
     // exception stack, shared state, etc
-    let max_avail = hyperlight_common::layout::MAX_GPA - hyperlight_common::vmem::PAGE_SIZE * 2;
+    let max_avail =
+        hyperlight_common::layout::SCRATCH_TOP_GPA - hyperlight_common::vmem::PAGE_SIZE * 2;
     if x.checked_add(nbytes)
         .is_none_or(|xx| xx >= max_avail as u64)
     {
diff --git a/src/hyperlight_guest/src/layout.rs b/src/hyperlight_guest/src/layout.rs
index e4da4dd89..a9f56d78b 100644
--- a/src/hyperlight_guest/src/layout.rs
+++ b/src/hyperlight_guest/src/layout.rs
@@ -21,26 +21,26 @@ mod arch;
 
 pub use arch::{MAIN_STACK_LIMIT_GVA, MAIN_STACK_TOP_GVA};
 pub fn scratch_size_gva() -> *mut u64 {
-    use hyperlight_common::layout::{MAX_GVA, SCRATCH_TOP_SIZE_OFFSET};
-    (MAX_GVA as u64 - SCRATCH_TOP_SIZE_OFFSET + 1) as *mut u64
+    use hyperlight_common::layout::{SCRATCH_TOP_GVA, SCRATCH_TOP_SIZE_OFFSET};
+    (SCRATCH_TOP_GVA as u64 - SCRATCH_TOP_SIZE_OFFSET + 1) as *mut u64
 }
 pub fn allocator_gva() -> *mut u64 {
-    use hyperlight_common::layout::{MAX_GVA, SCRATCH_TOP_ALLOCATOR_OFFSET};
-    (MAX_GVA as u64 - SCRATCH_TOP_ALLOCATOR_OFFSET + 1) as *mut u64
+    use hyperlight_common::layout::{SCRATCH_TOP_ALLOCATOR_OFFSET, SCRATCH_TOP_GVA};
+    (SCRATCH_TOP_GVA as u64 - SCRATCH_TOP_ALLOCATOR_OFFSET + 1) as *mut u64
 }
 pub fn snapshot_pt_gpa_base_gva() -> *mut u64 {
-    use hyperlight_common::layout::{MAX_GVA, SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET};
-    (MAX_GVA as u64 - SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET + 1) as *mut u64
+    use hyperlight_common::layout::{SCRATCH_TOP_GVA, SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET};
+    (SCRATCH_TOP_GVA as u64 - SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET + 1) as *mut u64
 }
 pub fn snapshot_generation_gva() -> *mut u64 {
-    use hyperlight_common::layout::{MAX_GVA, SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET};
-    (MAX_GVA as u64 - SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET + 1) as *mut u64
+    use hyperlight_common::layout::{SCRATCH_TOP_GVA, SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET};
+    (SCRATCH_TOP_GVA as u64 - SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET + 1) as *mut u64
 }
 pub use arch::{scratch_base_gpa, scratch_base_gva};
 
 /// Returns a pointer to the guest counter u64 in scratch memory.
 #[cfg(feature = "guest-counter")]
 pub fn guest_counter_gva() -> *const u64 {
-    use hyperlight_common::layout::{MAX_GVA, SCRATCH_TOP_GUEST_COUNTER_OFFSET};
-    (MAX_GVA as u64 - SCRATCH_TOP_GUEST_COUNTER_OFFSET + 1) as *const u64
+    use hyperlight_common::layout::{SCRATCH_TOP_GUEST_COUNTER_OFFSET, SCRATCH_TOP_GVA};
+    (SCRATCH_TOP_GVA as u64 - SCRATCH_TOP_GUEST_COUNTER_OFFSET + 1) as *const u64
 }
diff --git a/src/hyperlight_guest_bin/src/arch/aarch64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/aarch64/exception/entry.rs
new file mode 100644
index 000000000..2b3d22d21
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/arch/aarch64/exception/entry.rs
@@ -0,0 +1,173 @@
+/*
+Copyright 2026 The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ */
+
+use core::arch::global_asm;
+use core::mem::{offset_of, size_of};
+
+use super::types::*;
+
+const _: () = assert!(2 * size_of::<u64>() == 0x10);
+const _: () = assert!(2 * size_of::<u128>() == 0x20);
+
+// sp should already have been lowered to make room for the context
+// save structure
+//
+// x30 should have been saved already
+global_asm!("
+.global context_save\n
+context_save:\n
+    stp  x0,  x1, [sp, #{x_off}+0x00]\n
+    stp  x2,  x3, [sp, #{x_off}+0x10]\n
+    stp  x4,  x5, [sp, #{x_off}+0x20]\n
+    stp  x6,  x7, [sp, #{x_off}+0x30]\n
+    stp  x8,  x9, [sp, #{x_off}+0x40]\n
+    stp x10, x11, [sp, #{x_off}+0x50]\n
+    stp x12, x13, [sp, #{x_off}+0x60]\n
+    stp x14, x15, [sp, #{x_off}+0x70]\n
+    stp x16, x17, [sp, #{x_off}+0x80]\n
+    stp x18, x19, [sp, #{x_off}+0x90]\n
+    stp x20, x21, [sp, #{x_off}+0xa0]\n
+    stp x22, x23, [sp, #{x_off}+0xb0]\n
+    stp x24, x25, [sp, #{x_off}+0xc0]\n
+    stp x26, x27, [sp, #{x_off}+0xd0]\n
+    stp x28, x29, [sp, #{x_off}+0xe0]\n
+    mrs x0, fpcr\n
+    mrs x1, fpsr\n
+    stp x0, x1, [sp, #{fpcr_off}]\n
+    stp  q0,  q1, [sp, #{q_off}+0x000]\n
+    stp  q2,  q3, [sp, #{q_off}+0x020]\n
+    stp  q4,  q5, [sp, #{q_off}+0x040]\n
+    stp  q6,  q7, [sp, #{q_off}+0x060]\n
+    stp  q8,  q9, [sp, #{q_off}+0x080]\n
+    stp q10, q11, [sp, #{q_off}+0x0a0]\n
+    stp q12, q13, [sp, #{q_off}+0x0c0]\n
+    stp q14, q15, [sp, #{q_off}+0x0e0]\n
+    stp q16, q17, [sp, #{q_off}+0x100]\n
+    stp q18, q19, [sp, #{q_off}+0x120]\n
+    stp q20, q21, [sp, #{q_off}+0x140]\n
+    stp q22, q23, [sp, #{q_off}+0x160]\n
+    stp q24, q25, [sp, #{q_off}+0x180]\n
+    stp q26, q27, [sp, #{q_off}+0x1a0]\n
+    stp q28, q29, [sp, #{q_off}+0x1c0]\n
+    stp q30, q31, [sp, #{q_off}+0x1e0]\n
+    ret
+",
+    x_off = const offset_of!(ExceptionContext, x),
+    fpcr_off = const offset_of!(ExceptionContext, fpcr),
+    q_off = const offset_of!(ExceptionContext, q),
+);
+
+global_asm!("
+.global context_restore\n
+context_restore:\n
+    ldp x0, x1, [sp, #{fpcr_off}]\n
+    msr fpcr, x0\n
+    msr fpsr, x1\n
+    ldp  x0,  x1, [sp, #{x_off}+0x00]\n
+    ldp  x2,  x3, [sp, #{x_off}+0x10]\n
+    ldp  x4,  x5, [sp, #{x_off}+0x20]\n
+    ldp  x6,  x7, [sp, #{x_off}+0x30]\n
+    ldp  x8,  x9, [sp, #{x_off}+0x40]\n
+    ldp x10, x11, [sp, #{x_off}+0x50]\n
+    ldp x12, x13, [sp, #{x_off}+0x60]\n
+    ldp x14, x15, [sp, #{x_off}+0x70]\n
+    ldp x16, x17, [sp, #{x_off}+0x80]\n
+    ldp x18, x19, [sp, #{x_off}+0x90]\n
+    ldp x20, x21, [sp, #{x_off}+0xa0]\n
+    ldp x22, x23, [sp, #{x_off}+0xb0]\n
+    ldp x24, x25, [sp, #{x_off}+0xc0]\n
+    ldp x26, x27, [sp, #{x_off}+0xd0]\n
+    ldp x28, x29, [sp, #{x_off}+0xe0]\n
+    ldr x30,      [sp, #{x_off}+0xf0]\n
+    ldp  q0,  q1, [sp, #{q_off}+0x000]\n
+    ldp  q2,  q3, [sp, #{q_off}+0x020]\n
+    ldp  q4,  q5, [sp, #{q_off}+0x040]\n
+    ldp  q6,  q7, [sp, #{q_off}+0x060]\n
+    ldp  q8,  q9, [sp, #{q_off}+0x080]\n
+    ldp q10, q11, [sp, #{q_off}+0x0a0]\n
+    ldp q12, q13, [sp, #{q_off}+0x0c0]\n
+    ldp q14, q15, [sp, #{q_off}+0x0e0]\n
+    ldp q16, q17, [sp, #{q_off}+0x100]\n
+    ldp q18, q19, [sp, #{q_off}+0x120]\n
+    ldp q20, q21, [sp, #{q_off}+0x140]\n
+    ldp q22, q23, [sp, #{q_off}+0x160]\n
+    ldp q24, q25, [sp, #{q_off}+0x180]\n
+    ldp q26, q27, [sp, #{q_off}+0x1a0]\n
+    ldp q28, q29, [sp, #{q_off}+0x1c0]\n
+    ldp q30, q31, [sp, #{q_off}+0x1e0]\n
+    add sp, sp, #{ctx_size}\n
+    eret\n
+",
+    ctx_size = const size_of::<ExceptionContext>(),
+    x_off = const offset_of!(ExceptionContext, x),
+    fpcr_off = const offset_of!(ExceptionContext, fpcr),
+    q_off = const offset_of!(ExceptionContext, q),
+);
+
+macro_rules! vbar_entry {
+    ($et:literal, $ef:literal) => {
+        concat!(
+            "
+            sub sp, sp, #{ctx_size}\n
+            str x30, [sp, #{x30_off}]\n
+            bl context_save\n
+            mov x0, {ExceptionType_",
+            $et,
+            "}\n
+            mov x1, {ExceptionFrom_",
+            $ef,
+            "}\n
+            mov x2, sp\n
+            bl {handler}\n
+            b context_restore\n
+            .balign 0x80\n
+        "
+        )
+    };
+}
+
+global_asm!("
+.balign 0x800\n
+.global vbar\n
+vbar:\n",
+    vbar_entry!("Synchronous", "CurrentSP0"),
+    vbar_entry!("IRQ", "CurrentSP0"),
+    vbar_entry!("FIQ", "CurrentSP0"),
+    vbar_entry!("SError", "CurrentSP0"),
+    vbar_entry!("Synchronous", "CurrentSPx"),
+    vbar_entry!("IRQ", "CurrentSPx"),
+    vbar_entry!("FIQ", "CurrentSPx"),
+    vbar_entry!("SError", "CurrentSPx"),
+    vbar_entry!("Synchronous", "LowerAArch64"),
+    vbar_entry!("IRQ", "LowerAArch64"),
+    vbar_entry!("FIQ", "LowerAArch64"),
+    vbar_entry!("SError", "LowerAArch64"),
+    vbar_entry!("Synchronous", "LowerAArch32"),
+    vbar_entry!("IRQ", "LowerAArch32"),
+    vbar_entry!("FIQ", "LowerAArch32"),
+    vbar_entry!("SError", "LowerAArch32"),
+    ctx_size = const size_of::<ExceptionContext>(),
+    x30_off = const offset_of!(ExceptionContext, x) + 15 * 0x010,
+    handler = sym super::handle::handle_exception,
+    ExceptionType_Synchronous = const ExceptionType::Synchronous as u64,
+    ExceptionType_IRQ = const ExceptionType::IRQ as u64,
+    ExceptionType_FIQ = const ExceptionType::FIQ as u64,
+    ExceptionType_SError = const ExceptionType::SError as u64,
+    ExceptionFrom_CurrentSP0 = const ExceptionFrom::CurrentSP0 as u64,
+    ExceptionFrom_CurrentSPx = const ExceptionFrom::CurrentSPx as u64,
+    ExceptionFrom_LowerAArch64 = const ExceptionFrom::LowerAArch64 as u64,
+    ExceptionFrom_LowerAArch32 = const ExceptionFrom::LowerAArch32 as u64,
+);
diff --git a/src/hyperlight_guest_bin/src/arch/aarch64/exception/handle.rs b/src/hyperlight_guest_bin/src/arch/aarch64/exception/handle.rs
new file mode 100644
index 000000000..464ccf9ae
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/arch/aarch64/exception/handle.rs
@@ -0,0 +1,233 @@
+/*
+Copyright 2026 The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ */
+use core::fmt::Write;
+
+use hyperlight_common::vmem::{
+    BasicMapping, CowMapping, MappingKind, PAGE_SIZE, PhysAddr, VirtAddr,
+};
+use hyperlight_guest::error::ErrorCode;
+use hyperlight_guest::exit::write_abort;
+use hyperlight_guest::layout::{MAIN_STACK_LIMIT_GVA, MAIN_STACK_TOP_GVA};
+
+use super::super::mrs;
+use super::types::*;
+use crate::HyperlightAbortWriter;
+
+/// Utility function to extract an (inclusive on both ends) bit range
+/// from a quadword.
+#[inline(always)]
+fn bits<const HIGH_BIT: u8, const LOW_BIT: u8>(x: u64) -> u64 {
+    (x & ((1 << (HIGH_BIT + 1)) - 1)) >> LOW_BIT
+}
+
+const ESR_EC_DATA_ABORT_LOWER_EL: u64 = 0b100100;
+const ESR_EC_DATA_ABORT_SAME_EL: u64 = 0b100101;
+
+// some of the data in these is not used presently, but is logically
+// part of the code being decoded & should be accounted for
+#[allow(dead_code)]
+#[derive(Debug, Copy, Clone)]
+enum DataFault {
+    TranslationFault(i64),
+    PermissionFault(i64),
+    Other(u64),
+}
+fn decode_data_fault(dfsc: u64) -> DataFault {
+    if bits::<5, 2>(dfsc) == 0b0011 {
+        DataFault::PermissionFault(bits::<1, 0>(dfsc) as i64)
+    } else if bits::<5, 2>(dfsc) == 0b0001 {
+        DataFault::TranslationFault(bits::<1, 0>(dfsc) as i64)
+    } else if bits::<5, 2>(dfsc) == 0b1010 {
+        if bits::<1, 0>(dfsc) >= 2 {
+            DataFault::TranslationFault(bits::<1, 0>(dfsc) as i64 - 4)
+        } else {
+            DataFault::Other(dfsc)
+        }
+    } else {
+        DataFault::Other(dfsc)
+    }
+}
+
+// some of the data in these is not used presently, but is logically
+// part of the code being decoded & should be accounted for
+#[allow(dead_code)]
+#[derive(Debug, Copy, Clone)]
+enum Exception {
+    /// lower el?, faulting address, status code
+    DataFault(bool, u64, DataFault),
+    Other(u64),
+}
+fn decode_syndrome(esr: u64) -> Exception {
+    let ec = bits::<31, 26>(esr);
+    match ec {
+        ESR_EC_DATA_ABORT_LOWER_EL => Exception::DataFault(
+            true,
+            unsafe { mrs!(FAR_EL1) },
+            decode_data_fault(bits::<5, 0>(esr)),
+        ),
+        ESR_EC_DATA_ABORT_SAME_EL => Exception::DataFault(
+            false,
+            unsafe { mrs!(FAR_EL1) },
+            decode_data_fault(bits::<5, 0>(esr)),
+        ),
+        _ => Exception::Other(esr),
+    }
+}
+
+fn handle_stack_fault(far: u64) {
+    // TODO: perhaps we should have a sanity check that the
+    // stack grows only one page at a time, which should be
+    // ensured by our stack probing discipline?
+    unsafe {
+        let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1);
+        crate::paging::map_region(
+            new_page,
+            (far & !((PAGE_SIZE - 1) as u64)) as *mut u8,
+            PAGE_SIZE as u64,
+            MappingKind::Basic(BasicMapping {
+                readable: true,
+                writable: true,
+                executable: false,
+            }),
+        );
+        // We don't use crate::barrier::first_valid_same_ctx, because
+        // we don't (presently) use FEAT_ExS and consequently don't
+        // need the `isb`.
+        core::arch::asm!("dsb sy");
+    }
+}
+
+fn handle_cow_fault(_orig_phys: PhysAddr, virt: VirtAddr, perms: CowMapping) {
+    unsafe {
+        let new_page = hyperlight_guest::prim_alloc::alloc_phys_pages(1);
+        let target_virt = virt as *mut u8;
+        let Some(scratch_mapping_access) = crate::paging::phys_to_virt(new_page) else {
+            write_abort(&[ErrorCode::GuestError as u8, 0xfeu8]);
+            write_abort("impossible: phys_to_virt failed on alloc_phys_pages return".as_bytes());
+            write_abort(&[0xFF]);
+            // At this point, write_abort with the 0xFF terminator is
+            // expected to terminate guest execution, so control
+            // should never reach beyond this call.
+            unreachable!();
+        };
+        core::ptr::copy(target_virt, scratch_mapping_access, PAGE_SIZE);
+        // todo(multithreading): this will definitely require a
+        // break-before-make sequence
+        crate::paging::map_region(
+            new_page,
+            target_virt,
+            PAGE_SIZE as u64,
+            MappingKind::Basic(BasicMapping {
+                // Inherit R bit from the original mapping (always 1 at the moment)
+                readable: perms.readable,
+                // If we got here, the original marking was marked
+                // CoW, so the copied mapping should always be
+                // writable
+                writable: true,
+                executable: perms.executable,
+            }),
+        );
+        // This is updating an entry that was already valid, changing
+        // its OA, so we need to actually invalidate the TLB for it.
+        core::arch::asm!("
+            dsb ish
+            tlbi vae1is, {}
+            dsb ish
+            isb
+        ",
+            in(reg) (virt >> 12),
+            options(readonly, nostack, preserves_flags)
+        );
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "Rust" fn _debug_print(x: &str) {
+    hyperlight_guest::exit::debug_print(x);
+}
+
+fn handle_internal_fault(exn: Exception) -> bool {
+    match exn {
+        Exception::DataFault(false, far, DataFault::TranslationFault(_)) => {
+            if (MAIN_STACK_LIMIT_GVA..MAIN_STACK_TOP_GVA).contains(&far) {
+                handle_stack_fault(far);
+                true
+            } else {
+                false
+            }
+        }
+        Exception::DataFault(false, far, DataFault::PermissionFault(_)) => {
+            let mut orig_mappings = crate::paging::virt_to_phys(far);
+            if let Some(mapping) = orig_mappings.next()
+                && let None = orig_mappings.next()
+                && let MappingKind::Cow(cm) = mapping.kind
+            {
+                handle_cow_fault(mapping.phys_base, mapping.virt_base, cm);
+                true
+            } else {
+                false
+            }
+        }
+        _ => false,
+    }
+}
+
+pub(super) extern "C" fn handle_exception(
+    typ: ExceptionType,
+    from: ExceptionFrom,
+    _regs: *mut ExceptionContext,
+) {
+    let esr = unsafe { mrs!(ESR_EL1) };
+
+    if typ == ExceptionType::Synchronous && from == ExceptionFrom::CurrentSP0 {
+        let exn = decode_syndrome(esr);
+        if handle_internal_fault(exn) {
+            return;
+        }
+    }
+
+    // Die with some diagnostic information
+    let elr = unsafe { mrs!(ELR_EL1) };
+    let far = unsafe { mrs!(FAR_EL1) };
+    let insn_bytes = unsafe { (elr as *const [u8; 8]).read_volatile() };
+    // amd64 provides the exception vector as the first byte of the
+    // abort sequence after the guest error identifier code, but the
+    // host doesn't use it for anything except printing an error
+    // message, so it's not really useful to try to find an analogue
+    // (e.g. we could use ESR_EL1.EC---but it's only used for
+    // debugging and we'll include the whole syndrome in the message
+    // anyway). So, use 0xfe which is invalid as an exception on x86,
+    // to let the host know not to try to print anything extra.
+    let mut w = HyperlightAbortWriter;
+    write_abort(&[ErrorCode::GuestError as u8, 0xfe as u8]);
+    let write_res = write!(
+        w,
+        "Exception vector: {:?} {:?}\n\
+         Faulting Instruction: {:#x}\n\
+         Bytes At Faulting Instruction: {:?}\n\
+         Faulting Address: {:#x}\n\
+         Exception Syndrome: {:#x}",
+        from, typ, elr, insn_bytes, far, esr
+    );
+    if write_res.is_err() {
+        write_abort("exception message format failed".as_bytes());
+    }
+
+    write_abort(&[0xFF]);
+    // At this point, write_abort with the 0xFF terminator is expected to terminate guest execution,
+    // so control should never reach beyond this call.
+    unreachable!();
+}
diff --git a/src/hyperlight_guest_bin/src/arch/aarch64/exception/mod.rs b/src/hyperlight_guest_bin/src/arch/aarch64/exception/mod.rs
new file mode 100644
index 000000000..89db2bb7f
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/arch/aarch64/exception/mod.rs
@@ -0,0 +1,19 @@
+/*
+Copyright 2026 The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ */
+
+pub(super) mod entry;
+pub mod handle;
+mod types;
diff --git a/src/hyperlight_guest_bin/src/arch/aarch64/exception/types.rs b/src/hyperlight_guest_bin/src/arch/aarch64/exception/types.rs
new file mode 100644
index 000000000..a73d7a617
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/arch/aarch64/exception/types.rs
@@ -0,0 +1,46 @@
+/*
+Copyright 2026 The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ */
+
+use core::mem::{offset_of, size_of};
+
+#[derive(Debug, PartialEq)]
+#[repr(u64)]
+pub(super) enum ExceptionType {
+    Synchronous,
+    IRQ,
+    FIQ,
+    SError,
+}
+
+#[derive(Debug, PartialEq)]
+#[repr(u64)]
+pub(super) enum ExceptionFrom {
+    CurrentSP0,
+    CurrentSPx,
+    LowerAArch64,
+    LowerAArch32,
+}
+
+#[repr(C)]
+pub(super) struct ExceptionContext {
+    pub(super) x: [u64; 31],
+    pub(super) fpcr: u64,
+    pub(super) fpsr: u64,
+    // No need to store main context SP: it's in SP_EL0
+    pub(super) q: [u128; 32],
+}
+const _: () = assert!(size_of::<ExceptionContext>().is_multiple_of(16));
+const _: () = assert!(offset_of!(ExceptionContext, fpsr) == offset_of!(ExceptionContext, fpcr) + 8);
diff --git a/src/hyperlight_guest_bin/src/arch/aarch64/mod.rs b/src/hyperlight_guest_bin/src/arch/aarch64/mod.rs
index 4af3f518a..7da984d51 100644
--- a/src/hyperlight_guest_bin/src/arch/aarch64/mod.rs
+++ b/src/hyperlight_guest_bin/src/arch/aarch64/mod.rs
@@ -16,20 +16,110 @@ limitations under the License.
 
 // TODO(aarch64): implement aarch64 guest runtime
 
+const IO_PAGE_GVA: u64 = hyperlight_common::layout::io_page().unwrap().1;
+const HLT_ADDR: u64 = IO_PAGE_GVA
+    + (core::mem::size_of::<u64>() as u64 * hyperlight_common::outb::VmAction::Halt as u64);
+
 pub mod dispatch {
-    /// Dispatch function pointer — set during initialisation and called
-    /// by the host for each guest function invocation.
-    #[unsafe(no_mangle)]
-    pub extern "C" fn dispatch_function() {
-        unimplemented!("aarch64 dispatch_function")
+    unsafe extern "C" {
+        /// See comments in amd64/dispatch.rs for why this
+        /// architecture-dependent stub exists
+        ///
+        /// # ABI
+        ///
+        /// If a TLB flush is required, the host should start executing
+        /// one instruction (4 bytes) after the base address of the
+        /// dispatch function.
+        pub(crate) unsafe fn dispatch_function();
+    }
+    core::arch::global_asm!("
+        .global dispatch_function
+        dispatch_function:
+        .cfi_startproc\n
+        .cfi_undefined x30\n
+        b 0f\n
+        tlbi vmalle1\n
+        dsb ish\n
+        isb\n
+        0:\n
+        bl {internal_dispatch_function}\n
+        ldr x1, ={hlt_addr}\n
+        str x0, [x1]\n
+        .cfi_endproc\n
+    ",
+        internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function,
+        hlt_addr = const super::HLT_ADDR,
+    );
+}
+
+mod exception;
+
+macro_rules! msr {
+    ($sysreg:ident, $expr:expr) => {
+        core::arch::asm!(concat!("msr ", core::stringify!($sysreg), ", {}"), in(reg) $expr);
+    }
+}
+pub(crate) use msr;
+macro_rules! mrs {
+    ($sysreg:ident) => {
+        {
+            let x: u64;
+            core::arch::asm!(concat!("mrs {}, ", core::stringify!($sysreg)), out(reg) x);
+            x
+        }
     }
 }
+pub(crate) use mrs;
 
-/// The entrypoint for the guest binary — called by the hypervisor.
-///
-/// On aarch64 this is a stub that will be implemented when the
-/// aarch64 hypervisor backend is ready.
+unsafe fn init_vbar() {
+    unsafe {
+        core::arch::asm!("
+            adrp {tmp}, vbar\n
+            add {tmp}, {tmp}, :lo12:vbar\n
+            msr VBAR_EL1, {tmp}\n
+        ", tmp = out(reg) _);
+    }
+}
+
+/// Machine-specific initialisation; calls [`crate::generic_init`]
+/// once VBAR and the main stack have been set up
 #[unsafe(no_mangle)]
-pub extern "C" fn entrypoint() -> ! {
-    unimplemented!("aarch64 entrypoint")
+pub extern "C" fn entrypoint(peb_address: u64, seed: u64, ops: u64, max_log_level: u64) -> ! {
+    unsafe {
+        init_vbar();
+        let stack_top = crate::init::init_stack();
+        pivot_stack(peb_address, seed, ops, max_log_level, stack_top);
+    }
 }
+
+unsafe extern "C" {
+    unsafe fn pivot_stack(
+        peb_address: u64,
+        seed: u64,
+        ops: u64,
+        max_log_level: u64,
+        stack_top: u64,
+    ) -> !;
+}
+
+core::arch::global_asm!("
+    .global pivot_stack\n
+    pivot_stack:\n
+    .cfi_startproc\n
+    .cfi_undefined x30\n
+    ldr x5, ={exn_stack}\n
+    msr SPSel, #1\n
+    mov sp, x5\n
+    msr SPSel, #0\n
+    mov sp, x4\n
+    bl {generic_init}\n
+    ldr x1, ={hlt_addr}\n
+    str x0, [x1]\n
+    .cfi_endproc\n
+",
+    exn_stack = const (hyperlight_common::layout::SCRATCH_TOP_GVA as u64
+        - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET
+        + 1),
+    generic_init = sym crate::generic_init,
+    hlt_addr = const HLT_ADDR,
+);
diff --git a/src/hyperlight_guest_bin/src/arch/aarch64/paging.rs b/src/hyperlight_guest_bin/src/arch/aarch64/paging.rs
new file mode 100644
index 000000000..1489ccd86
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/arch/aarch64/paging.rs
@@ -0,0 +1,180 @@
+/*
+Copyright 2025  The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+use hyperlight_common::vmem;
+use hyperlight_guest::prim_alloc::alloc_phys_pages;
+
+use crate::arch::{mrs, msr};
+// TODO: This is not at all thread-safe atm
+
+#[derive(Copy, Clone)]
+struct GuestMappingOperations {
+    scratch_base_gpa: u64,
+    scratch_base_gva: u64,
+}
+impl GuestMappingOperations {
+    fn new() -> Self {
+        Self {
+            scratch_base_gpa: hyperlight_guest::layout::scratch_base_gpa(),
+            scratch_base_gva: hyperlight_guest::layout::scratch_base_gva(),
+        }
+    }
+    fn try_phys_to_virt(&self, addr: u64) -> Option<*mut u8> {
+        if addr >= self.scratch_base_gpa {
+            Some((self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8)
+        } else {
+            None
+        }
+    }
+    fn phys_to_virt(&self, addr: u64) -> *mut u8 {
+        self.try_phys_to_virt(addr)
+            .expect("phys_to_virt encountered snapshot non-PT page")
+    }
+}
+// for virt_to_phys
+impl core::convert::AsRef<GuestMappingOperations> for GuestMappingOperations {
+    fn as_ref(&self) -> &Self {
+        self
+    }
+}
+impl vmem::TableReadOps for GuestMappingOperations {
+    type TableAddr = u64;
+    fn entry_addr(addr: u64, offset: u64) -> u64 {
+        addr + offset
+    }
+    unsafe fn read_entry(&self, addr: u64) -> u64 {
+        let addr = self.phys_to_virt(addr);
+        unsafe { (addr as *mut u64).read_volatile() }
+    }
+    fn to_phys(addr: u64) -> u64 {
+        addr
+    }
+    fn from_phys(addr: u64) -> u64 {
+        addr
+    }
+    fn root_table(&self) -> u64 {
+        unsafe { mrs!(TTBR0_EL1) & !0xfff }
+    }
+}
+
+impl vmem::TableOps for GuestMappingOperations {
+    // Currently, we don't actually move tables anywhere on amd64
+    // because of issues with guest PTs in IPAs that are mapped
+    // readonly in Stage 2 translation. However, this code all works
+    // and will re-enabled as soon as there is improved
+    // architecture/hypervisor support.
+    type TableMovability = vmem::MayMoveTable;
+    unsafe fn alloc_table(&self) -> u64 {
+        let page_addr = unsafe { alloc_phys_pages(1) };
+        unsafe {
+            self.phys_to_virt(page_addr)
+                .write_bytes(0u8, vmem::PAGE_TABLE_SIZE);
+            // Make sure that the zero'ing writes are ordered with the
+            // subsequent write that will actually link this table
+            // into the hierarchy, so that the table walker can never
+            // read+cache a stale valid entry. See e.g. litmus test
+            // ROT.inv+dmbst in [1]
+            //
+            // [1] Ben Simner, Alasdair Armstrong, Jean
+            //     Pichon-Pharabod, Christopher Pulte, Richard
+            //     Grisenthwaite, and Peter Sewell. 2022. Relaxed
+            //     virtual memory [extended version]. In: Proceedings
+            //     of the 31st European Symposium on Systems
+            //     Programming, ESOP 2022.
+            core::arch::asm!("dmb st");
+        }
+        page_addr
+    }
+    unsafe fn write_entry(&self, addr: u64, entry: u64) -> Option<u64> {
+        unsafe {
+            (self.phys_to_virt(addr) as *mut u64).write_volatile(entry);
+        }
+        None
+    }
+    unsafe fn update_root(&self, new_root: u64) {
+        unsafe {
+            msr!(TTBR0_EL1, new_root);
+        }
+    }
+}
+
+/// Assumption: all are page-aligned
+/// # Safety
+/// This function modifies pages backing a virtual memory range which is inherently unsafe w.r.t.
+/// the Rust memory model.
+/// When using this function note:
+/// - No locking is performed before touching page table data structures,
+///   as such do not use concurrently with any other page table operations
+/// - TLB invalidation is not performed,
+///   if previously-unmapped ranges are not being mapped, TLB invalidation may need to be performed afterwards.
+pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64, kind: vmem::MappingKind) {
+    unsafe {
+        vmem::map(
+            &GuestMappingOperations::new(),
+            vmem::Mapping {
+                phys_base,
+                virt_base: virt_base as u64,
+                len,
+                kind,
+                user_accessible: false,
+            },
+        );
+    }
+}
+
+pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator<Item = vmem::Mapping> {
+    unsafe { vmem::virt_to_phys::<_>(GuestMappingOperations::new(), gva, 1) }
+}
+
+pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> {
+    GuestMappingOperations::new().try_phys_to_virt(gpa)
+}
+
+pub mod barrier {
+    /// # Architecture-specific (aarch64) notes
+    ///
+    /// I_WZCBG from [1]:
+    /// > When a translation table entry that generates a Translation
+    /// > fault, Address size fault, or Access flag fault is changed to
+    /// > one that does not fault, all of the following apply to
+    /// > software:
+    /// > - TLB invalidation is not required because an entry that
+    /// >   generates one of the listed faults is never cached in a TLB.
+    /// > - A Context synchronization event is required to ensure that
+    ///     the completed change to the translation table entry affects
+    ///     subsequent instruction fetches.
+    ///
+    /// In theory, without FEAT_nTLBPA, there could be some subtlety
+    /// here if the physical memory location used for the descriptor
+    /// was previously used after the last TLBI to store a valid
+    /// descriptor. Hyperlight does not recycle page tables in a way
+    /// that would cause problems here.
+    ///
+    /// [1] Arm Architecture Reference Manual for A-profile architecture
+    ///         Chapter D8: The AArch64 Virtual Memory System Architecture
+    ///             §D8.17 TLB maintenance
+    #[inline(always)]
+    pub fn first_valid_same_ctx() {
+        unsafe {
+            core::arch::asm!(
+                "
+                dsb ish
+                isb
+            "
+            );
+        }
+    }
+}
diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs
index 073bd3a2f..4dfa7e2f8 100644
--- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs
+++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs
@@ -92,7 +92,7 @@ unsafe fn init_tss(pc: *mut ProcCtrl) {
         let tss_ptr = &raw mut (*pc).tss;
         // copy byte by byte to avoid alignment issues
         let ist1_ptr = &raw mut (*tss_ptr).ist1 as *mut [u8; 8];
-        let exn_stack = hyperlight_common::layout::MAX_GVA as u64
+        let exn_stack = hyperlight_common::layout::SCRATCH_TOP_GVA as u64
             - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET
             + 1;
         ist1_ptr.write_volatile(exn_stack.to_ne_bytes());
@@ -104,28 +104,6 @@ unsafe fn init_tss(pc: *mut ProcCtrl) {
     }
 }
 
-/// To initialise the main stack, we just pre-emptively map the first
-/// page of it.
-unsafe fn init_stack() -> u64 {
-    use hyperlight_guest::layout::MAIN_STACK_TOP_GVA;
-    let stack_top_page_base = (MAIN_STACK_TOP_GVA - 1) & !0xfff;
-    unsafe {
-        use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE};
-        crate::paging::map_region(
-            hyperlight_guest::prim_alloc::alloc_phys_pages(1),
-            stack_top_page_base as *mut u8,
-            PAGE_SIZE as u64,
-            MappingKind::Basic(BasicMapping {
-                readable: true,
-                writable: true,
-                executable: false,
-            }),
-        );
-        crate::paging::barrier::first_valid_same_ctx();
-    }
-    MAIN_STACK_TOP_GVA
-}
-
 /// Machine-specific initialisation; calls [`crate::generic_init`]
 /// once stack, CoW, etc have been set up.
 #[unsafe(no_mangle)]
@@ -138,7 +116,7 @@ pub extern "C" fn entrypoint(peb_address: u64, seed: u64, ops: u64, max_log_leve
         init_gdt(pc);
         init_tss(pc);
         init_idt(pc);
-        let stack_top = init_stack();
+        let stack_top = crate::init::init_stack();
 
         // Architecture early init is complete! We pivot now to
         // executing on the main stack, and jump into generic
diff --git a/src/hyperlight_guest_bin/src/arch/amd64/paging.rs b/src/hyperlight_guest_bin/src/arch/amd64/paging.rs
new file mode 100644
index 000000000..8af130eec
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/arch/amd64/paging.rs
@@ -0,0 +1,202 @@
+/*
+Copyright 2025  The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+use core::arch::asm;
+
+use hyperlight_common::vmem;
+use hyperlight_guest::prim_alloc::alloc_phys_pages;
+
+// TODO: This is not at all thread-safe atm
+// TODO: A lot of code in this file uses inline assembly to load and
+//       store page table entries. It would be nice to use pointer
+//       volatile read/writes instead, but unfortunately we have a PTE
+//       at physical address 0, which is currently identity-mapped at
+//       virtual address 0, and Rust raw pointer operations can't be
+//       used to read/write from address 0.
+
+#[derive(Copy, Clone)]
+struct GuestMappingOperations {
+    scratch_base_gpa: u64,
+    scratch_base_gva: u64,
+}
+impl GuestMappingOperations {
+    fn new() -> Self {
+        Self {
+            scratch_base_gpa: hyperlight_guest::layout::scratch_base_gpa(),
+            scratch_base_gva: hyperlight_guest::layout::scratch_base_gva(),
+        }
+    }
+    fn try_phys_to_virt(&self, addr: u64) -> Option<*mut u8> {
+        if addr >= self.scratch_base_gpa {
+            Some((self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8)
+        } else {
+            None
+        }
+    }
+    fn phys_to_virt(&self, addr: u64) -> *mut u8 {
+        self.try_phys_to_virt(addr)
+            .expect("phys_to_virt encountered snapshot non-PT page")
+    }
+}
+// for virt_to_phys
+impl core::convert::AsRef<GuestMappingOperations> for GuestMappingOperations {
+    fn as_ref(&self) -> &Self {
+        self
+    }
+}
+impl vmem::TableReadOps for GuestMappingOperations {
+    type TableAddr = u64;
+    fn entry_addr(addr: u64, offset: u64) -> u64 {
+        addr + offset
+    }
+    unsafe fn read_entry(&self, addr: u64) -> u64 {
+        let addr = self.phys_to_virt(addr);
+        let ret: u64;
+        unsafe {
+            asm!("mov {}, qword ptr [{}]", out(reg) ret, in(reg) addr);
+        }
+        ret
+    }
+    fn to_phys(addr: u64) -> u64 {
+        addr
+    }
+    fn from_phys(addr: u64) -> u64 {
+        addr
+    }
+    fn root_table(&self) -> u64 {
+        let pml4_base: u64;
+        unsafe {
+            asm!("mov {}, cr3", out(reg) pml4_base);
+        }
+        pml4_base & !0xfff
+    }
+}
+
+impl vmem::TableOps for GuestMappingOperations {
+    // Currently, we don't actually move tables anywhere on amd64
+    // because of issues with guest PTs in IPAs that are mapped
+    // readonly in Stage 2 translation. However, this code all works
+    // and will re-enabled as soon as there is improved
+    // architecture/hypervisor support.
+    type TableMovability = vmem::MayMoveTable;
+    unsafe fn alloc_table(&self) -> u64 {
+        let page_addr = unsafe { alloc_phys_pages(1) };
+        unsafe {
+            self.phys_to_virt(page_addr)
+                .write_bytes(0u8, vmem::PAGE_TABLE_SIZE)
+        };
+        page_addr
+    }
+    unsafe fn write_entry(&self, addr: u64, entry: u64) -> Option<u64> {
+        let addr = self.phys_to_virt(addr);
+        unsafe {
+            asm!("mov qword ptr [{}], {}", in(reg) addr, in(reg) entry);
+        }
+        None
+    }
+    unsafe fn update_root(&self, new_root: u64) {
+        unsafe {
+            core::arch::asm!("mov cr3, {}", in(reg) <Self as vmem::TableReadOps>::to_phys(new_root));
+        }
+    }
+}
+
+/// Assumption: all are page-aligned
+/// # Safety
+/// This function modifies pages backing a virtual memory range which is inherently unsafe w.r.t.
+/// the Rust memory model.
+/// When using this function note:
+/// - No locking is performed before touching page table data structures,
+///   as such do not use concurrently with any other page table operations
+/// - TLB invalidation is not performed,
+///   if previously-unmapped ranges are not being mapped, TLB invalidation may need to be performed afterwards.
+pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64, kind: vmem::MappingKind) {
+    unsafe {
+        vmem::map(
+            &GuestMappingOperations::new(),
+            vmem::Mapping {
+                phys_base,
+                virt_base: virt_base as u64,
+                len,
+                kind,
+                user_accessible: false,
+            },
+        );
+    }
+}
+
+pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator<Item = vmem::Mapping> {
+    unsafe { vmem::virt_to_phys::<_>(GuestMappingOperations::new(), gva, 1) }
+}
+
+pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> {
+    GuestMappingOperations::new().try_phys_to_virt(gpa)
+}
+
+/// Barriers that other code may need to use when updating page tables
+pub mod barrier {
+    /// Call this function when a virtual address has just been made
+    /// valid for the first time after the last tlb invalidate that
+    /// affected it, and it will be used for the first time in the
+    /// same execution context as has made the modification.
+    ///
+    /// On most architectures, TLBs will not cache invalid entries, so
+    /// this does not need to issue a TLB. However, it does need to
+    /// ensure coherency between the previous writes and any future
+    /// uses by a page table walker.
+    ///
+    /// # Architecture-specific (amd64) notes
+    ///
+    /// The exact details around page walk coherency on amd64 seem a
+    /// bit fuzzy. The Intel manual notes that a serialising
+    /// instruction is necessary specifically to synchronise table
+    /// walks performed during instruction fetch [1], but is
+    /// relatively quiet about other page walks. The AMD manual notes
+    /// [2] that "a table entry is allowed to be upgraded (by marking
+    /// it as present, or by removing its write, execute or supervisor
+    /// restrictions) without explicitly maintaining TLB coherency",
+    /// but only states that TLB any upper-level TLB cache entries
+    /// will be flushed before re-walking to confirm the fault, which
+    /// does not clearly seem strong enough.
+    ///
+    /// In some limited testing, `mfence` typically seems to be
+    /// enough, but as it is not a serializing instruction on Intel
+    /// platforms, we assume it may not be quite good enough.  `cpuid`
+    /// is likely to be very slow, since we are definitely running
+    /// under a hypervisor (and often even nested). Currently, for
+    /// simplicity's sake, this just copies cr0 to itself, but other
+    /// options (including the `serialize` instruction where
+    /// available) could be worth exploring.
+    ///
+    /// [1] Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3: System Programming Guide
+    ///         Chapter 5: Paging
+    ///             §5.10: Caching Translation Information
+    ///                 §5.10.4: Invalidation of TLBs and Paging-Structure Caches
+    ///                     §5.10.4.3: Optional Invalidation
+    /// [2] AMD64 Architecture Programmer's Manual, Volume 2: System Programming
+    ///         Section 5: Page Translation and Protection
+    ///             §5.5: Translation-Lookaside Buffer
+    ///                 §5.5.3: TLB Management
+    #[inline(always)]
+    pub fn first_valid_same_ctx() {
+        unsafe {
+            core::arch::asm!("
+                mov rax, cr0
+                mov cr0, rax
+            ", out("rax") _);
+        }
+    }
+}
diff --git a/src/hyperlight_guest_bin/src/init.rs b/src/hyperlight_guest_bin/src/init.rs
new file mode 100644
index 000000000..824a51576
--- /dev/null
+++ b/src/hyperlight_guest_bin/src/init.rs
@@ -0,0 +1,38 @@
+/*
+Copyright 2025  The Hyperlight Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ */
+
+/// To initialise the main stack, we just pre-emptively map the first
+/// page of it. We assume the architecture-specific exception handler
+/// will allocate pages on fault as necessary
+pub(crate) unsafe fn init_stack() -> u64 {
+    use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE};
+    use hyperlight_guest::layout::MAIN_STACK_TOP_GVA;
+    let stack_top_page_base = (MAIN_STACK_TOP_GVA - 1) & !(PAGE_SIZE as u64 - 1);
+    unsafe {
+        crate::paging::map_region(
+            hyperlight_guest::prim_alloc::alloc_phys_pages(1),
+            stack_top_page_base as *mut u8,
+            PAGE_SIZE as u64,
+            MappingKind::Basic(BasicMapping {
+                readable: true,
+                writable: true,
+                executable: false,
+            }),
+        );
+        crate::paging::barrier::first_valid_same_ctx();
+    }
+    MAIN_STACK_TOP_GVA
+}
diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs
index 450b54930..6b8039fac 100644
--- a/src/hyperlight_guest_bin/src/lib.rs
+++ b/src/hyperlight_guest_bin/src/lib.rs
@@ -51,7 +51,6 @@ pub mod error;
 pub mod guest_logger;
 pub mod host_comm;
 pub mod memory;
-#[cfg(target_arch = "x86_64")]
 pub mod paging;
 
 /// Bridge between picolibc's POSIX expectations and the Hyperlight host.
@@ -59,6 +58,9 @@ pub mod paging;
 #[cfg(feature = "libc")]
 mod libc_stubs;
 
+/// Shared initialisation code used by multiple architectures
+mod init;
+
 /// Re-export the libc bindings from hyperlight-libc when the libc feature is enabled.
 #[cfg(feature = "libc")]
 pub use hyperlight_libc as libc;
diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs
index 8af130eec..9b559f6c2 100644
--- a/src/hyperlight_guest_bin/src/paging.rs
+++ b/src/hyperlight_guest_bin/src/paging.rs
@@ -14,138 +14,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-use core::arch::asm;
-
-use hyperlight_common::vmem;
-use hyperlight_guest::prim_alloc::alloc_phys_pages;
-
-// TODO: This is not at all thread-safe atm
-// TODO: A lot of code in this file uses inline assembly to load and
-//       store page table entries. It would be nice to use pointer
-//       volatile read/writes instead, but unfortunately we have a PTE
-//       at physical address 0, which is currently identity-mapped at
-//       virtual address 0, and Rust raw pointer operations can't be
-//       used to read/write from address 0.
-
-#[derive(Copy, Clone)]
-struct GuestMappingOperations {
-    scratch_base_gpa: u64,
-    scratch_base_gva: u64,
-}
-impl GuestMappingOperations {
-    fn new() -> Self {
-        Self {
-            scratch_base_gpa: hyperlight_guest::layout::scratch_base_gpa(),
-            scratch_base_gva: hyperlight_guest::layout::scratch_base_gva(),
-        }
-    }
-    fn try_phys_to_virt(&self, addr: u64) -> Option<*mut u8> {
-        if addr >= self.scratch_base_gpa {
-            Some((self.scratch_base_gva + (addr - self.scratch_base_gpa)) as *mut u8)
-        } else {
-            None
-        }
-    }
-    fn phys_to_virt(&self, addr: u64) -> *mut u8 {
-        self.try_phys_to_virt(addr)
-            .expect("phys_to_virt encountered snapshot non-PT page")
-    }
-}
-// for virt_to_phys
-impl core::convert::AsRef<GuestMappingOperations> for GuestMappingOperations {
-    fn as_ref(&self) -> &Self {
-        self
-    }
-}
-impl vmem::TableReadOps for GuestMappingOperations {
-    type TableAddr = u64;
-    fn entry_addr(addr: u64, offset: u64) -> u64 {
-        addr + offset
-    }
-    unsafe fn read_entry(&self, addr: u64) -> u64 {
-        let addr = self.phys_to_virt(addr);
-        let ret: u64;
-        unsafe {
-            asm!("mov {}, qword ptr [{}]", out(reg) ret, in(reg) addr);
-        }
-        ret
-    }
-    fn to_phys(addr: u64) -> u64 {
-        addr
-    }
-    fn from_phys(addr: u64) -> u64 {
-        addr
-    }
-    fn root_table(&self) -> u64 {
-        let pml4_base: u64;
-        unsafe {
-            asm!("mov {}, cr3", out(reg) pml4_base);
-        }
-        pml4_base & !0xfff
-    }
-}
-
-impl vmem::TableOps for GuestMappingOperations {
-    // Currently, we don't actually move tables anywhere on amd64
-    // because of issues with guest PTs in IPAs that are mapped
-    // readonly in Stage 2 translation. However, this code all works
-    // and will re-enabled as soon as there is improved
-    // architecture/hypervisor support.
-    type TableMovability = vmem::MayMoveTable;
-    unsafe fn alloc_table(&self) -> u64 {
-        let page_addr = unsafe { alloc_phys_pages(1) };
-        unsafe {
-            self.phys_to_virt(page_addr)
-                .write_bytes(0u8, vmem::PAGE_TABLE_SIZE)
-        };
-        page_addr
-    }
-    unsafe fn write_entry(&self, addr: u64, entry: u64) -> Option<u64> {
-        let addr = self.phys_to_virt(addr);
-        unsafe {
-            asm!("mov qword ptr [{}], {}", in(reg) addr, in(reg) entry);
-        }
-        None
-    }
-    unsafe fn update_root(&self, new_root: u64) {
-        unsafe {
-            core::arch::asm!("mov cr3, {}", in(reg) <Self as vmem::TableReadOps>::to_phys(new_root));
-        }
-    }
-}
-
-/// Assumption: all are page-aligned
-/// # Safety
-/// This function modifies pages backing a virtual memory range which is inherently unsafe w.r.t.
-/// the Rust memory model.
-/// When using this function note:
-/// - No locking is performed before touching page table data structures,
-///   as such do not use concurrently with any other page table operations
-/// - TLB invalidation is not performed,
-///   if previously-unmapped ranges are not being mapped, TLB invalidation may need to be performed afterwards.
-pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64, kind: vmem::MappingKind) {
-    unsafe {
-        vmem::map(
-            &GuestMappingOperations::new(),
-            vmem::Mapping {
-                phys_base,
-                virt_base: virt_base as u64,
-                len,
-                kind,
-                user_accessible: false,
-            },
-        );
-    }
-}
-
-pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator<Item = vmem::Mapping> {
-    unsafe { vmem::virt_to_phys::<_>(GuestMappingOperations::new(), gva, 1) }
-}
-
-pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> {
-    GuestMappingOperations::new().try_phys_to_virt(gpa)
-}
+#[cfg_attr(target_arch = "x86_64", path = "arch/amd64/paging.rs")]
+#[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/paging.rs")]
+mod arch;
 
+pub use arch::{map_region, phys_to_virt, virt_to_phys};
 /// Barriers that other code may need to use when updating page tables
 pub mod barrier {
     /// Call this function when a virtual address has just been made
@@ -157,46 +30,7 @@ pub mod barrier {
     /// this does not need to issue a TLB. However, it does need to
     /// ensure coherency between the previous writes and any future
     /// uses by a page table walker.
-    ///
-    /// # Architecture-specific (amd64) notes
-    ///
-    /// The exact details around page walk coherency on amd64 seem a
-    /// bit fuzzy. The Intel manual notes that a serialising
-    /// instruction is necessary specifically to synchronise table
-    /// walks performed during instruction fetch [1], but is
-    /// relatively quiet about other page walks. The AMD manual notes
-    /// [2] that "a table entry is allowed to be upgraded (by marking
-    /// it as present, or by removing its write, execute or supervisor
-    /// restrictions) without explicitly maintaining TLB coherency",
-    /// but only states that TLB any upper-level TLB cache entries
-    /// will be flushed before re-walking to confirm the fault, which
-    /// does not clearly seem strong enough.
-    ///
-    /// In some limited testing, `mfence` typically seems to be
-    /// enough, but as it is not a serializing instruction on Intel
-    /// platforms, we assume it may not be quite good enough.  `cpuid`
-    /// is likely to be very slow, since we are definitely running
-    /// under a hypervisor (and often even nested). Currently, for
-    /// simplicity's sake, this just copies cr0 to itself, but other
-    /// options (including the `serialize` instruction where
-    /// available) could be worth exploring.
-    ///
-    /// [1] Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3: System Programming Guide
-    ///         Chapter 5: Paging
-    ///             §5.10: Caching Translation Information
-    ///                 §5.10.4: Invalidation of TLBs and Paging-Structure Caches
-    ///                     §5.10.4.3: Optional Invalidation
-    /// [2] AMD64 Architecture Programmer's Manual, Volume 2: System Programming
-    ///         Section 5: Page Translation and Protection
-    ///             §5.5: Translation-Lookaside Buffer
-    ///                 §5.5.3: TLB Management
-    #[inline(always)]
-    pub fn first_valid_same_ctx() {
-        unsafe {
-            core::arch::asm!("
-                mov rax, cr0
-                mov cr0, rax
-            ", out("rax") _);
-        }
-    }
+    pub use arch::first_valid_same_ctx;
+
+    use super::arch::barrier as arch;
 }
diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs
index 42dbb7aeb..8ce5ea8b3 100644
--- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs
+++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs
@@ -17,6 +17,7 @@ limitations under the License.
 // TODO(aarch64): implement arch-specific HyperlightVm methods
 
 use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64};
 
 use super::{
     AccessPageTableError, CreateHyperlightVmError, DispatchGuestCallError, HyperlightVm,
@@ -24,9 +25,17 @@ use super::{
 };
 #[cfg(gdb)]
 use crate::hypervisor::gdb::{DebugCommChannel, DebugMsg, DebugResponse};
-use crate::hypervisor::regs::CommonSpecialRegisters;
-use crate::hypervisor::virtual_machine::RegisterError;
-use crate::mem::mgr::SandboxMemoryManager;
+use crate::hypervisor::hyperlight_vm::get_guest_log_filter;
+use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters};
+#[cfg(kvm)]
+use crate::hypervisor::virtual_machine::kvm::KvmVm;
+#[cfg(kvm)]
+use crate::hypervisor::virtual_machine::{HypervisorType, VmError};
+use crate::hypervisor::virtual_machine::{
+    ResetVcpuError, VirtualMachine, get_available_hypervisor,
+};
+use crate::hypervisor::{InterruptHandleImpl, LinuxInterruptHandle};
+use crate::mem::mgr::{SandboxMemoryManager, SnapshotSharedMemory};
 use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory};
 use crate::sandbox::SandboxConfiguration;
 use crate::sandbox::host_funcs::FunctionRegistry;
@@ -39,61 +48,180 @@ use crate::sandbox::uninitialized::SandboxRuntimeConfig;
 impl HyperlightVm {
     #[allow(clippy::too_many_arguments)]
     pub(crate) fn new(
-        _snapshot_mem: GuestSharedMemory,
-        _scratch_mem: GuestSharedMemory,
-        _root_pt_addr: u64,
-        _entrypoint: NextAction,
-        _rsp_gva: u64,
-        _config: &SandboxConfiguration,
+        snapshot_mem: SnapshotSharedMemory<GuestSharedMemory>,
+        scratch_mem: GuestSharedMemory,
+        root_pt_addr: u64,
+        entrypoint: NextAction,
+        rsp_gva: u64,
+        page_size: usize,
+        config: &SandboxConfiguration,
         #[cfg(gdb)] _gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
         #[cfg(crashdump)] _rt_cfg: SandboxRuntimeConfig,
         #[cfg(feature = "mem_profile")] _trace_info: MemTraceInfo,
     ) -> std::result::Result<Self, CreateHyperlightVmError> {
-        unimplemented!("new")
+        // TODO: support gdb on aarch64
+        type VmType = Box<dyn VirtualMachine>;
+        let vm: VmType = match get_available_hypervisor() {
+            #[cfg(kvm)]
+            Some(HypervisorType::Kvm) => Box::new(KvmVm::new().map_err(VmError::CreateVm)?),
+            // TODO: mshv support
+            #[cfg(mshv3)]
+            Some(HypervisorType::Mshv) => return Err(CreateHyperlightVmError::NoHypervisorFound),
+            None => return Err(CreateHyperlightVmError::NoHypervisorFound),
+        };
+        vm.set_sregs(&CommonSpecialRegisters::defaults(root_pt_addr))
+            .map_err(VmError::Register)?;
+        let interrupt_handle: Arc<dyn InterruptHandleImpl> = Arc::new(LinuxInterruptHandle {
+            state: AtomicU8::new(0),
+            tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }),
+            retry_delay: config.get_interrupt_retry_delay(),
+            sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(),
+            dropped: AtomicBool::new(false),
+        });
+
+        let snapshot_slot = 0u32;
+        let scratch_slot = 1u32;
+        let vm_can_reset_vcpu = vm.can_reset_vcpu();
+        let mut ret = Self {
+            vm,
+            entrypoint,
+            rsp_gva,
+            interrupt_handle,
+            page_size,
+
+            next_slot: scratch_slot + 1,
+            freed_slots: Vec::new(),
+
+            snapshot_slot,
+            snapshot_memory: None,
+            scratch_slot,
+            scratch_memory: None,
+
+            mmap_regions: Vec::new(),
+
+            vm_can_reset_vcpu,
+            pending_tlb_flush: false,
+        };
+        ret.update_snapshot_mapping(snapshot_mem)?;
+        ret.update_scratch_mapping(scratch_mem)?;
+        Ok(ret)
     }
 
     #[allow(clippy::too_many_arguments)]
     pub(crate) fn initialise(
         &mut self,
-        _peb_addr: crate::mem::ptr::RawPtr,
-        _seed: u64,
-        _page_size: u32,
-        _mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
-        _host_funcs: &Arc<std::sync::Mutex<FunctionRegistry>>,
-        _guest_max_log_level: Option<tracing_core::LevelFilter>,
-        #[cfg(gdb)] _dbg_mem_access_fn: Arc<
+        peb_addr: crate::mem::ptr::RawPtr,
+        seed: u64,
+        mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
+        host_funcs: &Arc<std::sync::Mutex<FunctionRegistry>>,
+        guest_max_log_level: Option<tracing_core::LevelFilter>,
+        #[cfg(gdb)] dbg_mem_access_fn: Arc<
             std::sync::Mutex<SandboxMemoryManager<HostSharedMemory>>,
         >,
     ) -> Result<(), InitializeError> {
-        unimplemented!("initialise")
+        let NextAction::Initialise(initialise) = self.entrypoint else {
+            return Ok(());
+        };
+        let mut x: [u64; 31] = [0; 31];
+        x[0] = peb_addr.into();
+        x[1] = seed;
+        x[2] = self.page_size as u64;
+        x[3] = get_guest_log_filter(guest_max_log_level);
+        let regs = CommonRegisters {
+            pc: initialise,
+            sp: self.rsp_gva,
+            x,
+            // start up with interrupts disabled in EL1t
+            pstate: 0b11 << 6 | 0b100,
+        };
+        self.vm.set_regs(&regs)?;
+
+        self.run(
+            mem_mgr,
+            host_funcs,
+            #[cfg(gdb)]
+            dbg_mem_access_fn,
+        )
+        .map_err(InitializeError::Run)?;
+
+        let regs = self.vm.regs()?;
+        if !regs.sp.is_multiple_of(16) {
+            return Err(InitializeError::InvalidStackPointer(regs.sp));
+        }
+        self.rsp_gva = regs.sp;
+        self.entrypoint = NextAction::Call(regs.x[0]);
+
+        Ok(())
     }
 
     pub(crate) fn dispatch_call_from_host(
         &mut self,
-        _mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
-        _host_funcs: &Arc<std::sync::Mutex<FunctionRegistry>>,
+        mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
+        host_funcs: &Arc<std::sync::Mutex<FunctionRegistry>>,
         #[cfg(gdb)] _dbg_mem_access_fn: Arc<
             std::sync::Mutex<SandboxMemoryManager<HostSharedMemory>>,
         >,
     ) -> Result<(), DispatchGuestCallError> {
-        unimplemented!("dispatch_call_from_host")
+        let NextAction::Call(dispatch_func_addr) = self.entrypoint else {
+            return Err(DispatchGuestCallError::Uninitialized);
+        };
+        let mut regs = CommonRegisters {
+            pc: dispatch_func_addr,
+            sp: self.rsp_gva,
+            // start with interrupts disabled in EL1t
+            pstate: 0b1 << 21 | 0b11 << 6 | 0b100,
+            ..Default::default()
+        };
+        if self.pending_tlb_flush {
+            regs.pc += 4;
+        }
+        self.vm
+            .set_regs(&regs)
+            .map_err(DispatchGuestCallError::SetupRegs)?;
+        self.vm
+            .set_fpu(&CommonFpu::default())
+            .map_err(DispatchGuestCallError::SetupRegs)?;
+        let result = self
+            .run(
+                mem_mgr,
+                host_funcs,
+                #[cfg(gdb)]
+                mem_access_fn,
+            )
+            .map_err(DispatchGuestCallError::Run);
+        self.pending_tlb_flush = false;
+        result
     }
 
     pub(crate) fn get_root_pt(&self) -> Result<u64, AccessPageTableError> {
-        unimplemented!("get_root_pt")
+        let sregs = self.vm.sregs()?;
+        Ok(sregs.ttbr0_el1 & ((1 << 48) - 2))
     }
 
     pub(crate) fn get_snapshot_sregs(
         &mut self,
     ) -> Result<CommonSpecialRegisters, AccessPageTableError> {
-        unimplemented!("get_snapshot_sregs")
+        let x = self.vm.sregs()?;
+        Ok(x)
     }
 
     pub(crate) fn reset_vcpu(
         &mut self,
-        _cr3: u64,
-        _sregs: &CommonSpecialRegisters,
-    ) -> std::result::Result<(), RegisterError> {
-        unimplemented!("reset_vcpu")
+        cr3: u64,
+        sregs: &CommonSpecialRegisters,
+    ) -> std::result::Result<(), ResetVcpuError> {
+        self.pending_tlb_flush = true;
+        debug_assert!(
+            self.vm_can_reset_vcpu,
+            "No fallback path for vcpu reset on aarch64"
+        );
+        self.vm.reset_vcpu()?;
+        let mut sregs = *sregs;
+        sregs.ttbr0_el1 = cr3 & ((1 << 48) - 2);
+
+        self.vm
+            .set_sregs(&sregs)
+            .map_err(ResetVcpuError::Register)?;
+        Ok(())
     }
 }
diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs
index 830b856c0..f4e8f983b 100644
--- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs
+++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs
@@ -43,7 +43,7 @@ use crate::hypervisor::hyperlight_vm::x86_64::debug::ProcessDebugRequestError;
 #[cfg(not(gdb))]
 use crate::hypervisor::virtual_machine::VirtualMachine;
 use crate::hypervisor::virtual_machine::{
-    MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VmError, VmExit,
+    MapMemoryError, RegisterError, ResetVcpuError, RunVcpuError, UnmapMemoryError, VmError, VmExit,
 };
 use crate::hypervisor::{InterruptHandle, InterruptHandleImpl};
 use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType};
@@ -344,7 +344,7 @@ pub enum HyperlightVmError {
     #[error("Map region error: {0}")]
     MapRegion(#[from] MapRegionError),
     #[error("Restore VM (vcpu) error: {0}")]
-    Restore(#[from] RegisterError),
+    Restore(#[from] ResetVcpuError),
     #[error("Unmap region error: {0}")]
     UnmapRegion(#[from] UnmapRegionError),
     #[error("Update region error: {0}")]
@@ -383,6 +383,7 @@ pub(crate) struct HyperlightVm {
 
     pub(super) mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region)
 
+    pub(self) vm_can_reset_vcpu: bool,
     pub(super) pending_tlb_flush: bool,
 
     #[cfg(gdb)]
diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs
index 16ac55ad3..281696092 100644
--- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs
+++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs
@@ -162,6 +162,7 @@ impl HyperlightVm {
 
             mmap_regions: Vec::new(),
 
+            vm_can_reset_vcpu: false,
             pending_tlb_flush: false,
 
             #[cfg(gdb)]
@@ -202,7 +203,6 @@ impl HyperlightVm {
         &mut self,
         peb_addr: RawPtr,
         seed: u64,
-        page_size: u32,
         mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
         host_funcs: &Arc<Mutex<FunctionRegistry>>,
         guest_max_log_level: Option<LevelFilter>,
@@ -225,7 +225,7 @@ impl HyperlightVm {
             // function args
             rdi: peb_addr.into(),
             rsi: seed,
-            rdx: page_size.into(),
+            rdx: self.page_size as u64,
             rcx: get_guest_log_filter(guest_max_log_level),
             rflags: 1 << 1,
 
@@ -338,7 +338,7 @@ impl HyperlightVm {
         &mut self,
         cr3: u64,
         sregs: &CommonSpecialRegisters,
-    ) -> std::result::Result<(), RegisterError> {
+    ) -> std::result::Result<(), ResetVcpuError> {
         self.vm.set_regs(&CommonRegisters {
             rflags: 1 << 1, // Reserved bit always set
             ..Default::default()
@@ -346,7 +346,9 @@ impl HyperlightVm {
         self.vm.set_debug_regs(&CommonDebugRegs::default())?;
         self.vm.reset_xsave()?;
 
-        self.apply_sregs(cr3, sregs)
+        self.apply_sregs(cr3, sregs)?;
+
+        Ok(())
     }
 
     /// Apply special registers and mark TLB for flush.
@@ -1499,7 +1501,7 @@ mod tests {
         let (mut hshm, gshm) = mem_mgr.build().unwrap();
 
         let peb_address = gshm.layout.peb_address();
-        let stack_top_gva = hyperlight_common::layout::MAX_GVA as u64
+        let stack_top_gva = hyperlight_common::layout::SCRATCH_TOP_GVA as u64
             - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET
             + 1;
         let mut vm = set_up_hypervisor_partition(
@@ -1525,7 +1527,6 @@ mod tests {
         vm.initialise(
             peb_addr,
             seed,
-            page_size,
             &mut hshm,
             &host_funcs,
             None,
@@ -2112,7 +2113,7 @@ mod tests {
 
             /// Get the stack top GVA, same as the regular codepath.
             fn stack_top_gva(&self) -> u64 {
-                hyperlight_common::layout::MAX_GVA as u64
+                hyperlight_common::layout::SCRATCH_TOP_GVA as u64
                     - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET
                     + 1
             }
diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs
index be1a15c22..2b33000ea 100644
--- a/src/hyperlight_host/src/hypervisor/mod.rs
+++ b/src/hyperlight_host/src/hypervisor/mod.rs
@@ -487,7 +487,7 @@ pub(crate) mod tests {
         let sandbox =
             UninitializedSandbox::new(GuestBinary::FilePath(filename.clone()), Some(config))?;
         let (mut mem_mgr, gshm) = sandbox.mgr.build().unwrap();
-        let exn_stack_top_gva = hyperlight_common::layout::MAX_GVA as u64
+        let exn_stack_top_gva = hyperlight_common::layout::SCRATCH_TOP_GVA as u64
             - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET
             + 1;
         let mut vm = set_up_hypervisor_partition(
@@ -514,7 +514,6 @@ pub(crate) mod tests {
         vm.initialise(
             peb_addr,
             seed,
-            page_size,
             &mut mem_mgr,
             &host_funcs,
             guest_max_log_level,
diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/common_fpu.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/common_fpu.rs
new file mode 100644
index 000000000..8cecc067c
--- /dev/null
+++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/common_fpu.rs
@@ -0,0 +1,6 @@
+#[derive(Debug, Default, Copy, Clone, PartialEq)]
+pub(crate) struct CommonFpu {
+    pub(crate) v: [u128; 32],
+    pub(crate) fpsr: u32,
+    pub(crate) fpcr: u32,
+}
diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/common_regs.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/common_regs.rs
new file mode 100644
index 000000000..e15dfc0c4
--- /dev/null
+++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/common_regs.rs
@@ -0,0 +1,7 @@
+#[derive(Debug, Default, Copy, Clone, PartialEq)]
+pub(crate) struct CommonRegisters {
+    pub(crate) x: [u64; 31],
+    pub(crate) sp: u64,
+    pub(crate) pc: u64,
+    pub(crate) pstate: u64,
+}
diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/fpu.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/fpu.rs
new file mode 100644
index 000000000..4c1757a36
--- /dev/null
+++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/fpu.rs
@@ -0,0 +1,6 @@
+#[derive(Debug, Default, Copy, Clone, PartialEq1)]
+pub(crate) struct CommonFpu {
+    pub(crate) v: [u128; 32],
+    pub(crate) fpsr: u32,
+    pub(crate) fpcr: u32,
+}
diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/kvm_reg.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/kvm_reg.rs
new file mode 100644
index 000000000..c6d5a51e9
--- /dev/null
+++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/kvm_reg.rs
@@ -0,0 +1,162 @@
+use kvm_bindings::{
+    KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK,
+    KVM_REG_ARM64_SYSREG_CRM_SHIFT, KVM_REG_ARM64_SYSREG_CRN_MASK, KVM_REG_ARM64_SYSREG_CRN_SHIFT,
+    KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP0_SHIFT, KVM_REG_ARM64_SYSREG_OP1_MASK,
+    KVM_REG_ARM64_SYSREG_OP1_SHIFT, KVM_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM64_SYSREG_OP2_SHIFT,
+    KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, KVM_REG_SIZE_U128,
+};
+use kvm_ioctls::VcpuFd;
+
+enum Size {
+    U32,
+    U64,
+    U128,
+}
+const fn size_kvm_bits(s: Size) -> u64 {
+    match s {
+        Size::U32 => KVM_REG_SIZE_U32,
+        Size::U64 => KVM_REG_SIZE_U64,
+        Size::U128 => KVM_REG_SIZE_U128,
+    }
+}
+const fn kvm_sys_reg(op0: u8, op1: u8, crn: u8, crm: u8, op2: u8, s: Size) -> u64 {
+    KVM_REG_ARM64
+        | (KVM_REG_ARM64_SYSREG as u64)
+        | (((op0 as u64) << KVM_REG_ARM64_SYSREG_OP0_SHIFT) & KVM_REG_ARM64_SYSREG_OP0_MASK as u64)
+        | (((op1 as u64) << KVM_REG_ARM64_SYSREG_OP1_SHIFT) & KVM_REG_ARM64_SYSREG_OP1_MASK as u64)
+        | (((crn as u64) << KVM_REG_ARM64_SYSREG_CRN_SHIFT) & KVM_REG_ARM64_SYSREG_CRN_MASK as u64)
+        | (((crm as u64) << KVM_REG_ARM64_SYSREG_CRM_SHIFT) & KVM_REG_ARM64_SYSREG_CRM_MASK as u64)
+        | (((op2 as u64) << KVM_REG_ARM64_SYSREG_OP2_SHIFT) & KVM_REG_ARM64_SYSREG_OP2_MASK as u64)
+        | size_kvm_bits(s)
+}
+macro_rules! decl_sys_reg {
+    ($name:ident, $op0:expr, $op1:expr, $crn:expr, $crm:expr, $op2:expr, $size:ident) => {
+        pub const $name: u64 = kvm_sys_reg($op0, $op1, $crn, $crm, $op2, Size::$size);
+    };
+}
+decl_sys_reg!(TTBR0_EL1, 0b11, 0b000, 0b0010, 0b0000, 0b000, U64);
+decl_sys_reg!(TCR_EL1, 0b11, 0b000, 0b0010, 0b0000, 0b010, U64);
+decl_sys_reg!(MAIR_EL1, 0b11, 0b000, 0b1010, 0b0010, 0b000, U64);
+decl_sys_reg!(SCTLR_EL1, 0b11, 0b000, 0b0001, 0b0000, 0b000, U64);
+decl_sys_reg!(CPACR_EL1, 0b11, 0b000, 0b0001, 0b0000, 0b010, U64);
+decl_sys_reg!(VBAR_EL1, 0b11, 0b000, 0b1100, 0b0000, 0b000, U64);
+
+const fn kvm_core_reg(offset: u8, s: Size) -> u64 {
+    KVM_REG_ARM64 | 0x10_0000u64 | offset as u64 | size_kvm_bits(s)
+}
+macro_rules! decl_core_reg {
+    ($name:ident, $offset:expr, $size:ident) => {
+        pub const $name: u64 = kvm_core_reg($offset, Size::$size);
+    };
+}
+decl_core_reg!(X0, 0x00, U64);
+decl_core_reg!(X1, 0x02, U64);
+decl_core_reg!(X2, 0x04, U64);
+decl_core_reg!(X3, 0x06, U64);
+decl_core_reg!(X4, 0x08, U64);
+decl_core_reg!(X5, 0x0A, U64);
+decl_core_reg!(X6, 0x0C, U64);
+decl_core_reg!(X7, 0x0E, U64);
+decl_core_reg!(X8, 0x10, U64);
+decl_core_reg!(X9, 0x12, U64);
+decl_core_reg!(X10, 0x14, U64);
+decl_core_reg!(X11, 0x16, U64);
+decl_core_reg!(X12, 0x18, U64);
+decl_core_reg!(X13, 0x1A, U64);
+decl_core_reg!(X14, 0x1C, U64);
+decl_core_reg!(X15, 0x1E, U64);
+decl_core_reg!(X16, 0x20, U64);
+decl_core_reg!(X17, 0x22, U64);
+decl_core_reg!(X18, 0x24, U64);
+decl_core_reg!(X19, 0x26, U64);
+decl_core_reg!(X20, 0x28, U64);
+decl_core_reg!(X21, 0x2A, U64);
+decl_core_reg!(X22, 0x2C, U64);
+decl_core_reg!(X23, 0x2E, U64);
+decl_core_reg!(X24, 0x30, U64);
+decl_core_reg!(X25, 0x32, U64);
+decl_core_reg!(X26, 0x34, U64);
+decl_core_reg!(X27, 0x36, U64);
+decl_core_reg!(X28, 0x38, U64);
+decl_core_reg!(X29, 0x3A, U64);
+decl_core_reg!(X30, 0x3C, U64);
+decl_core_reg!(SP, 0x3E, U64);
+decl_core_reg!(PC, 0x40, U64);
+decl_core_reg!(PSTATE, 0x42, U64);
+decl_core_reg!(SP_EL1, 0x44, U64);
+// ignore the other SPSRs that are just for AA32-compat
+decl_core_reg!(V0, 0x54, U128);
+decl_core_reg!(V1, 0x58, U128);
+decl_core_reg!(V2, 0x5c, U128);
+decl_core_reg!(V3, 0x60, U128);
+decl_core_reg!(V4, 0x64, U128);
+decl_core_reg!(V5, 0x68, U128);
+decl_core_reg!(V6, 0x6c, U128);
+decl_core_reg!(V7, 0x70, U128);
+decl_core_reg!(V8, 0x74, U128);
+decl_core_reg!(V9, 0x78, U128);
+decl_core_reg!(V10, 0x7c, U128);
+decl_core_reg!(V11, 0x80, U128);
+decl_core_reg!(V12, 0x84, U128);
+decl_core_reg!(V13, 0x88, U128);
+decl_core_reg!(V14, 0x8c, U128);
+decl_core_reg!(V15, 0x90, U128);
+decl_core_reg!(V16, 0x94, U128);
+decl_core_reg!(V17, 0x98, U128);
+decl_core_reg!(V18, 0x9c, U128);
+decl_core_reg!(V19, 0xa0, U128);
+decl_core_reg!(V20, 0xa4, U128);
+decl_core_reg!(V21, 0xa8, U128);
+decl_core_reg!(V22, 0xac, U128);
+decl_core_reg!(V23, 0xb0, U128);
+decl_core_reg!(V24, 0xb4, U128);
+decl_core_reg!(V25, 0xb8, U128);
+decl_core_reg!(V26, 0xbc, U128);
+decl_core_reg!(V27, 0xc0, U128);
+decl_core_reg!(V28, 0xc4, U128);
+decl_core_reg!(V29, 0xc8, U128);
+decl_core_reg!(V30, 0xcc, U128);
+decl_core_reg!(V31, 0xd0, U128);
+decl_core_reg!(FPSR, 0xd4, U32);
+decl_core_reg!(FPCR, 0xd4, U32);
+
+pub(crate) fn get_reg_bytes<const N: usize, E>(
+    fd: &VcpuFd,
+    id: u64,
+    err: impl Fn(kvm_ioctls::Error) -> E,
+) -> Result<[u8; N], E> {
+    let mut buf: [u8; N] = [0; N];
+    fd.get_one_reg(id, &mut buf).map_err(err)?;
+    Ok(buf)
+}
+macro_rules! get_reg {
+    ($fd:expr, $err:expr, $reg:ident, $t:ident) => {
+        $crate::hypervisor::regs::kvm_reg::get_reg_bytes::<{ core::mem::size_of::<$t>() }, _>(
+            $fd,
+            $crate::hypervisor::regs::kvm_reg::$reg,
+            $err,
+        )
+        .map($t::from_ne_bytes)
+    };
+}
+pub(crate) use get_reg;
+pub(crate) fn set_reg_bytes<const N: usize, E>(
+    fd: &VcpuFd,
+    err: impl Fn(kvm_ioctls::Error) -> E,
+    id: u64,
+    bytes: [u8; N],
+) -> Result<(), E> {
+    fd.set_one_reg(id, &bytes).map_err(err)?;
+    Ok(())
+}
+macro_rules! set_reg {
+    ($fd:expr, $err:expr, $reg:ident, $t:ident, $val:expr) => {
+        $crate::hypervisor::regs::kvm_reg::set_reg_bytes::<{ core::mem::size_of::<$t>() }, _>(
+            $fd,
+            $err,
+            $crate::hypervisor::regs::kvm_reg::$reg,
+            $val.to_ne_bytes(),
+        )
+    };
+}
+pub(crate) use set_reg;
diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs
index 8f91c634d..2f7331dbf 100644
--- a/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs
+++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs
@@ -16,22 +16,19 @@ limitations under the License.
 
 // TODO(aarch64): implement real register definitions
 
-#[derive(Debug, Default, Copy, Clone, PartialEq)]
-pub(crate) struct CommonRegisters {
-    _placeholder: u64,
-}
+mod common_regs;
+pub(crate) use common_regs::*;
 
-#[derive(Debug, Default, Copy, Clone, PartialEq)]
-pub(crate) struct CommonSpecialRegisters {
-    _placeholder: u64,
-}
+mod special_regs;
+pub(crate) use special_regs::*;
 
-#[derive(Debug, Default, Copy, Clone, PartialEq)]
-pub(crate) struct CommonFpu {
-    _placeholder: u64,
-}
+mod common_fpu;
+pub(crate) use common_fpu::*;
 
 #[derive(Debug, Default, Copy, Clone, PartialEq)]
 pub(crate) struct CommonDebugRegs {
     _placeholder: u64,
 }
+
+#[cfg(kvm)]
+pub(crate) mod kvm_reg;
diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/special_regs.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/special_regs.rs
new file mode 100644
index 000000000..159d544ec
--- /dev/null
+++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/special_regs.rs
@@ -0,0 +1,46 @@
+#[derive(Debug, Default, Copy, Clone, PartialEq)]
+pub(crate) struct CommonSpecialRegisters {
+    pub(crate) ttbr0_el1: u64,
+    // todo: handle ttbr1 as well
+    pub(crate) tcr_el1: u64,
+    pub(crate) mair_el1: u64,
+    pub(crate) sctlr_el1: u64,
+    pub(crate) cpacr_el1: u64,
+    pub(crate) vbar_el1: u64,
+    pub(crate) sp_el1: u64,
+}
+
+pub(crate) const TCR_EL1_PS_48: u64 = 0b101u64 << 32;
+pub(crate) const TCR_EL1_TG0_4K: u64 = 0b00u64 << 14;
+pub(crate) const TCR_EL1_TG1_4K: u64 = 0b00u64 << 30;
+#[allow(clippy::identity_op)]
+pub(crate) const TCR_EL1_T0SZ_48: u64 = 16u64 << 0;
+pub(crate) const TCR_EL1_T1SZ_48: u64 = 16u64 << 16;
+
+pub(crate) const MAIR_NORMAL_OWT_NT_AA: u64 = 0b10111011;
+pub(crate) const MAIR_ITEM_WIDTH: u8 = 8;
+
+pub(crate) const SCTLR_EL1_RES1: u64 = 0b11u64 << 28 | 0b11u64 << 22 | 0b1u64 << 20 | 0b1u64 << 11;
+pub(crate) const SCTLR_EL1_M: u64 = 0b1u64 << 0;
+pub(crate) const SCTLR_EL1_C: u64 = 0b1u64 << 2;
+
+pub(crate) const CPACR_EL1_FPEN_NO_TRAP: u64 = 0b11 << 20;
+
+impl CommonSpecialRegisters {
+    pub(crate) fn defaults(root_pt_addr: u64) -> Self {
+        CommonSpecialRegisters {
+            ttbr0_el1: root_pt_addr & !0xfff,
+            tcr_el1: TCR_EL1_PS_48
+                | TCR_EL1_TG0_4K
+                | TCR_EL1_TG1_4K
+                | TCR_EL1_T0SZ_48
+                | TCR_EL1_T1SZ_48,
+            mair_el1: MAIR_NORMAL_OWT_NT_AA
+                << (MAIR_ITEM_WIDTH * hyperlight_common::vmem::ATTR_INDEX_NORMAL),
+            sctlr_el1: SCTLR_EL1_RES1 | SCTLR_EL1_M | SCTLR_EL1_C,
+            cpacr_el1: CPACR_EL1_FPEN_NO_TRAP,
+            vbar_el1: 0,
+            sp_el1: 0,
+        }
+    }
+}
diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs
index 39ecb775d..d20b6fd8b 100644
--- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs
+++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs
@@ -16,25 +16,532 @@ limitations under the License.
 
 // TODO(aarch64): implement KVM backend
 
+use std::sync::LazyLock;
+
+use hyperlight_common::outb::VmAction;
+use kvm_bindings::{
+    KVM_CAP_ARM_NISV_TO_USER, KVM_EXIT_ARM_NISV, KVMIO, kvm_enable_cap, kvm_userspace_memory_region,
+};
+use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
 use tracing::{Span, instrument};
 
-use crate::hypervisor::virtual_machine::CreateVmError;
+use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters};
+use crate::hypervisor::virtual_machine::{
+    CreateVmError, HypervisorError, MapMemoryError, RegisterError, ResetVcpuError, RunVcpuError,
+    UnmapMemoryError, VirtualMachine, VmExit,
+};
+
+static KVM: LazyLock<std::result::Result<Kvm, CreateVmError>> =
+    LazyLock::new(|| Kvm::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into())));
 
 /// Return `true` if the KVM API is available
 #[instrument(skip_all, parent = Span::current(), level = "Trace")]
 pub(crate) fn is_hypervisor_present() -> bool {
-    // TODO(aarch64): implement KVM detection
-    false
+    if let Ok(kvm) = KVM.as_ref() {
+        let api_version = kvm.get_api_version();
+        api_version == 12
+    } else {
+        false
+    }
 }
 
 /// A KVM implementation of a single-vcpu VM
 #[derive(Debug)]
 pub(crate) struct KvmVm {
-    _placeholder: (),
+    vm_fd: VmFd,
+    vcpu_fd: VcpuFd,
 }
 
 impl KvmVm {
+    pub(self) fn vcpu_init(&mut self) -> Result<(), HypervisorError> {
+        let mut kvi = kvm_bindings::kvm_vcpu_init::default();
+        self.vm_fd.get_preferred_target(&mut kvi)?;
+        self.vcpu_fd.vcpu_init(&kvi)?;
+        Ok(())
+    }
     pub(crate) fn new() -> std::result::Result<Self, CreateVmError> {
-        unimplemented!("KvmVm::new")
+        let hv = KVM.as_ref().map_err(|e| e.clone())?;
+        let vm_fd = hv
+            .create_vm_with_type(0)
+            .map_err(|e| CreateVmError::CreateVmFd(e.into()))?;
+        if vm_fd.check_extension_raw(KVM_CAP_ARM_NISV_TO_USER as u64) != 0 {
+            // Available since Linux 5.5. Needed for the workaround
+            // described below for KVM mis-behaviour when a cache
+            // maintenance operation is applied to a VA that is paged
+            // out at Stage 2.
+            //
+            // When this cap is not available, there is a (small)
+            // chance that self-modifying code inside the VM will
+            // cause [`run_vcpu`] to fail, ultimately poisoning the
+            // sandbox. With this capability, the relevant code will
+            // instead be retried.
+            let cap: kvm_enable_cap = kvm_enable_cap {
+                cap: KVM_CAP_ARM_NISV_TO_USER,
+                ..Default::default()
+            };
+            unsafe {
+                vmm_sys_util::ioctl_iow_nr!(KVM_ENABLE_CAP, KVMIO, 0xa3, kvm_enable_cap);
+                vmm_sys_util::ioctl::ioctl_with_ref(&vm_fd, KVM_ENABLE_CAP(), &cap);
+            }
+        }
+
+        let vcpu_fd = vm_fd
+            .create_vcpu(0)
+            .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?;
+
+        let mut to_ret = Self { vm_fd, vcpu_fd };
+        to_ret
+            .vcpu_init()
+            .map_err(CreateVmError::SetPartitionProperty)?;
+        Ok(to_ret)
+    }
+
+    fn run_immediate_exit(&mut self) -> Result<(), Result<HypervisorError, String>> {
+        self.vcpu_fd.set_kvm_immediate_exit(1u8);
+        let ret = loop {
+            let r = self.vcpu_fd.run();
+            if let Err(e) = r {
+                match e.errno() {
+                    libc::EINTR => break Ok(()),
+                    libc::EAGAIN => continue,
+                    _ => break Err(Ok(e.into())),
+                }
+            } else {
+                break Err(Err(format!(
+                    "KVM run for state quiescence exited without EINTR: {:?}",
+                    r
+                )));
+            }
+        };
+        self.vcpu_fd.set_kvm_immediate_exit(0u8);
+        ret
+    }
+}
+
+impl VirtualMachine for KvmVm {
+    unsafe fn map_memory(
+        &mut self,
+        (slot, region): (u32, &crate::mem::memory_region::MemoryRegion),
+    ) -> std::result::Result<(), crate::hypervisor::virtual_machine::MapMemoryError> {
+        let mut kvm_region: kvm_userspace_memory_region = region.into();
+        kvm_region.slot = slot;
+        unsafe { self.vm_fd.set_user_memory_region(kvm_region) }
+            .map_err(|e| MapMemoryError::Hypervisor(e.into()))
+    }
+
+    fn unmap_memory(
+        &mut self,
+        (slot, region): (u32, &crate::mem::memory_region::MemoryRegion),
+    ) -> std::result::Result<(), crate::hypervisor::virtual_machine::UnmapMemoryError> {
+        let mut kvm_region: kvm_userspace_memory_region = region.into();
+        kvm_region.slot = slot;
+        // Setting memory_size to 0 unmaps the slot's region
+        // From https://docs.kernel.org/virt/kvm/api.html
+        // > Deleting a slot is done by passing zero for memory_size.
+        kvm_region.memory_size = 0;
+        unsafe { self.vm_fd.set_user_memory_region(kvm_region) }
+            .map_err(|e| UnmapMemoryError::Hypervisor(e.into()))
+    }
+
+    fn run_vcpu(
+        &mut self,
+        #[cfg(feature = "trace_guest")] tc: &mut SandboxTraceContext,
+    ) -> std::result::Result<
+        crate::hypervisor::virtual_machine::VmExit,
+        crate::hypervisor::virtual_machine::RunVcpuError,
+    > {
+        let exit = loop {
+            let mut exit = self.vcpu_fd.run();
+            if let Ok(VcpuExit::Unsupported(KVM_EXIT_ARM_NISV)) = exit {
+                // [`VcpuExit`] borrows the [`Vcpu`] which produced
+                // it, but that lifetime isn't used in this case. End
+                // the borrow early by re-constructing the value while
+                // preserving the possibility for more tests to be
+                // inserted after this one.
+                exit = Ok(VcpuExit::Unsupported(KVM_EXIT_ARM_NISV));
+                // If a readonly-at-stage-2 page is paged out at stage
+                // 2, KVM does not correctly handle the page fault due
+                // to Stage 2 translation that occurs when cache
+                // maintenance operations must resolve the page
+                // address in order to execute. KVM incorrectly treats
+                // the fault as an indication that the guest is making
+                // an MMIO access the details of which are not
+                // captured in NISV.
+                //
+                // Guest code tries to reduce the chance of this
+                // happening by making a data access shortly before
+                // the cache cleaning instructions. However, this is
+                // possibly racy, since KVM could page out the
+                // relevant Stage 2 translation in between the data
+                // access and the cache maintenance operation. In
+                // order to account for this case, we detect it and
+                // cooperate with code inside the VM to re-fault-in
+                // the page and re-try the cache maintenance operation
+                // in question.
+                //
+                // The calling convention for this is: any cache
+                // maintenance operation should be executed with the
+                // Zero flag cleared. If it fails for this reason,
+                // Hyperlight will increment PC to the next
+                // instruction as usual, but set the Zero flag. The
+                // guest should detect this and attempt to fault in
+                // the page and re-try the operation.
+                use crate::hypervisor::regs::kvm_reg::{get_reg, set_reg};
+                let pc = get_reg!(
+                    &self.vcpu_fd,
+                    |e| { RunVcpuError::Unknown(e.into()) },
+                    PC,
+                    u64
+                )?;
+                let pstate = get_reg!(
+                    &self.vcpu_fd,
+                    |e| { RunVcpuError::Unknown(e.into()) },
+                    PSTATE,
+                    u64
+                )?;
+
+                const Z_BIT: u64 = 1 << 30;
+                // Because we got here from the NISV mmio exit path,
+                // we know that ESR_EL2.EC codes for a Data Abort, and
+                // we can assume the relevant encoding of ESR_EL2.ISS
+                const ESR_EL2_ISS_CM: u64 = 1 << 8;
+
+                let esr_iss = unsafe {
+                    // SAFETY: KVM_EXIT_ARM_NISV implies this is the arm_nisv variant.
+                    self.vcpu_fd.get_kvm_run().__bindgen_anon_1.arm_nisv.esr_iss
+                };
+                if esr_iss & ESR_EL2_ISS_CM != 0 && pstate & Z_BIT == 0 {
+                    // if ESR_EL2.ISS.CM is set, the abort was caused
+                    // by a Cache Maintenance instruction. Assume that
+                    // any Cache Maintenance instruction in the VM is
+                    // part of a Hyperlight-aware sequence and can
+                    // deal with it.
+                    set_reg!(
+                        &self.vcpu_fd,
+                        |e| { RunVcpuError::Unknown(e.into()) },
+                        PSTATE,
+                        u64,
+                        pstate | Z_BIT
+                    )?;
+                    set_reg!(
+                        &self.vcpu_fd,
+                        |e| { RunVcpuError::Unknown(e.into()) },
+                        PC,
+                        u64,
+                        pc + 4
+                    )?;
+                    continue;
+                }
+            }
+            break exit;
+        };
+        match exit {
+            Ok(VcpuExit::MmioWrite(addr, data)) => {
+                let io_page_gpa = const { hyperlight_common::layout::io_page().unwrap().0 };
+                if addr > io_page_gpa
+                    && let off = (addr - io_page_gpa) as usize
+                    && off < hyperlight_common::vmem::PAGE_SIZE
+                {
+                    let port = off / core::mem::size_of::<u64>();
+                    if port == VmAction::Halt as usize {
+                        // As per [1]:
+                        // > For KVM_EXIT_IO [...] the corresponding operations are complete
+                        // > (and guest state is consistent) only after userspace has re-entered
+                        // > the kernel with KVM_RUN. The kernel side will first finish
+                        // > incomplete operations and then check for pending signals.
+                        // >
+                        // > The pending state of the operation is not preserved in state which
+                        // > is visible to userspace, thus userspace should ensure that the
+                        // > operation is completed before performing a live
+                        // > migration. Userspace can re-enter the guest with an unmasked signal
+                        // > pending or with the immediate_exit field set to complete pending
+                        // > operations without allowing any further instructions to be
+                        // > executed.
+                        //
+                        // On AArch64, the incomplete operation state includes incrementing the
+                        // program counter past the faulting I/O instruction. Since a halt exit
+                        // is used to logically end a thread of execution, we will likely start
+                        // executing from somewhere else again after, in which case such a
+                        // program counter increment would be undesirable. Therefore, in the hlt
+                        // case, re-enter the kernel with immediate_exit set right away to clear
+                        // that state.
+                        //
+                        // We assume that this pattern is not required in any other case,
+                        // because any error that prevents the guest code from fully unwinding
+                        // its stack and running "to completion" (i.e. to a halt exit) should
+                        // poison the sandbox, and the vcpu reset on sandbox reset needed to
+                        // un-poison it will take care of clearing the necessary state.
+                        self.run_immediate_exit()
+                            .map_err(|e| RunVcpuError::FlushMmioPending(format!("{:?}", e)))?;
+                        Ok(VmExit::Halt())
+                    } else {
+                        Ok(VmExit::IoOut(port as u16, data.to_vec()))
+                    }
+                } else {
+                    Ok(VmExit::MmioWrite(addr))
+                }
+            }
+            Ok(VcpuExit::MmioRead(addr, _)) => Ok(VmExit::MmioRead(addr)),
+            Err(e) => match e.errno() {
+                libc::EINTR => Ok(VmExit::Cancelled()),
+                libc::EAGAIN => Ok(VmExit::Retry()),
+                _ => Err(RunVcpuError::Unknown(e.into())),
+            },
+            Ok(other) => Ok(VmExit::Unknown(format!(
+                "Unknown KVM VCPU exit: {:?}",
+                other
+            ))),
+        }
+    }
+
+    fn regs(&self) -> std::result::Result<CommonRegisters, RegisterError> {
+        use crate::hypervisor::regs::kvm_reg::get_reg;
+        fn err(e: kvm_ioctls::Error) -> RegisterError {
+            RegisterError::GetSregs(e.into())
+        }
+        Ok(CommonRegisters {
+            x: [
+                get_reg!(&self.vcpu_fd, err, X0, u64)?,
+                get_reg!(&self.vcpu_fd, err, X1, u64)?,
+                get_reg!(&self.vcpu_fd, err, X2, u64)?,
+                get_reg!(&self.vcpu_fd, err, X3, u64)?,
+                get_reg!(&self.vcpu_fd, err, X4, u64)?,
+                get_reg!(&self.vcpu_fd, err, X5, u64)?,
+                get_reg!(&self.vcpu_fd, err, X6, u64)?,
+                get_reg!(&self.vcpu_fd, err, X7, u64)?,
+                get_reg!(&self.vcpu_fd, err, X8, u64)?,
+                get_reg!(&self.vcpu_fd, err, X9, u64)?,
+                get_reg!(&self.vcpu_fd, err, X10, u64)?,
+                get_reg!(&self.vcpu_fd, err, X11, u64)?,
+                get_reg!(&self.vcpu_fd, err, X12, u64)?,
+                get_reg!(&self.vcpu_fd, err, X13, u64)?,
+                get_reg!(&self.vcpu_fd, err, X14, u64)?,
+                get_reg!(&self.vcpu_fd, err, X15, u64)?,
+                get_reg!(&self.vcpu_fd, err, X16, u64)?,
+                get_reg!(&self.vcpu_fd, err, X17, u64)?,
+                get_reg!(&self.vcpu_fd, err, X18, u64)?,
+                get_reg!(&self.vcpu_fd, err, X19, u64)?,
+                get_reg!(&self.vcpu_fd, err, X20, u64)?,
+                get_reg!(&self.vcpu_fd, err, X21, u64)?,
+                get_reg!(&self.vcpu_fd, err, X22, u64)?,
+                get_reg!(&self.vcpu_fd, err, X23, u64)?,
+                get_reg!(&self.vcpu_fd, err, X24, u64)?,
+                get_reg!(&self.vcpu_fd, err, X25, u64)?,
+                get_reg!(&self.vcpu_fd, err, X26, u64)?,
+                get_reg!(&self.vcpu_fd, err, X27, u64)?,
+                get_reg!(&self.vcpu_fd, err, X28, u64)?,
+                get_reg!(&self.vcpu_fd, err, X29, u64)?,
+                get_reg!(&self.vcpu_fd, err, X30, u64)?,
+            ],
+            sp: get_reg!(&self.vcpu_fd, err, SP, u64)?,
+            pc: get_reg!(&self.vcpu_fd, err, PC, u64)?,
+            pstate: get_reg!(&self.vcpu_fd, err, PSTATE, u64)?,
+        })
+    }
+
+    fn set_regs(&self, regs: &CommonRegisters) -> std::result::Result<(), RegisterError> {
+        use crate::hypervisor::regs::kvm_reg::set_reg;
+        fn err(e: kvm_ioctls::Error) -> RegisterError {
+            RegisterError::SetSregs(e.into())
+        }
+        set_reg!(&self.vcpu_fd, err, X0, u64, regs.x[0])?;
+        set_reg!(&self.vcpu_fd, err, X1, u64, regs.x[1])?;
+        set_reg!(&self.vcpu_fd, err, X2, u64, regs.x[2])?;
+        set_reg!(&self.vcpu_fd, err, X3, u64, regs.x[3])?;
+        set_reg!(&self.vcpu_fd, err, X4, u64, regs.x[4])?;
+        set_reg!(&self.vcpu_fd, err, X5, u64, regs.x[5])?;
+        set_reg!(&self.vcpu_fd, err, X6, u64, regs.x[6])?;
+        set_reg!(&self.vcpu_fd, err, X7, u64, regs.x[7])?;
+        set_reg!(&self.vcpu_fd, err, X8, u64, regs.x[8])?;
+        set_reg!(&self.vcpu_fd, err, X9, u64, regs.x[9])?;
+        set_reg!(&self.vcpu_fd, err, X10, u64, regs.x[10])?;
+        set_reg!(&self.vcpu_fd, err, X11, u64, regs.x[11])?;
+        set_reg!(&self.vcpu_fd, err, X12, u64, regs.x[12])?;
+        set_reg!(&self.vcpu_fd, err, X13, u64, regs.x[13])?;
+        set_reg!(&self.vcpu_fd, err, X14, u64, regs.x[14])?;
+        set_reg!(&self.vcpu_fd, err, X15, u64, regs.x[15])?;
+        set_reg!(&self.vcpu_fd, err, X16, u64, regs.x[16])?;
+        set_reg!(&self.vcpu_fd, err, X17, u64, regs.x[17])?;
+        set_reg!(&self.vcpu_fd, err, X18, u64, regs.x[18])?;
+        set_reg!(&self.vcpu_fd, err, X19, u64, regs.x[19])?;
+        set_reg!(&self.vcpu_fd, err, X20, u64, regs.x[20])?;
+        set_reg!(&self.vcpu_fd, err, X21, u64, regs.x[21])?;
+        set_reg!(&self.vcpu_fd, err, X22, u64, regs.x[22])?;
+        set_reg!(&self.vcpu_fd, err, X23, u64, regs.x[23])?;
+        set_reg!(&self.vcpu_fd, err, X24, u64, regs.x[24])?;
+        set_reg!(&self.vcpu_fd, err, X25, u64, regs.x[25])?;
+        set_reg!(&self.vcpu_fd, err, X26, u64, regs.x[26])?;
+        set_reg!(&self.vcpu_fd, err, X27, u64, regs.x[27])?;
+        set_reg!(&self.vcpu_fd, err, X28, u64, regs.x[28])?;
+        set_reg!(&self.vcpu_fd, err, X29, u64, regs.x[29])?;
+        set_reg!(&self.vcpu_fd, err, X30, u64, regs.x[30])?;
+        set_reg!(&self.vcpu_fd, err, SP, u64, regs.sp)?;
+        set_reg!(&self.vcpu_fd, err, PC, u64, regs.pc)?;
+        set_reg!(&self.vcpu_fd, err, PSTATE, u64, regs.pstate)?;
+
+        Ok(())
+    }
+
+    fn fpu(&self) -> Result<CommonFpu, RegisterError> {
+        use crate::hypervisor::regs::CommonFpu;
+        use crate::hypervisor::regs::kvm_reg::get_reg;
+        fn err(e: kvm_ioctls::Error) -> RegisterError {
+            RegisterError::GetFpu(e.into())
+        }
+        Ok(CommonFpu {
+            v: [
+                get_reg!(&self.vcpu_fd, err, V0, u128)?,
+                get_reg!(&self.vcpu_fd, err, V1, u128)?,
+                get_reg!(&self.vcpu_fd, err, V2, u128)?,
+                get_reg!(&self.vcpu_fd, err, V3, u128)?,
+                get_reg!(&self.vcpu_fd, err, V4, u128)?,
+                get_reg!(&self.vcpu_fd, err, V5, u128)?,
+                get_reg!(&self.vcpu_fd, err, V6, u128)?,
+                get_reg!(&self.vcpu_fd, err, V7, u128)?,
+                get_reg!(&self.vcpu_fd, err, V8, u128)?,
+                get_reg!(&self.vcpu_fd, err, V9, u128)?,
+                get_reg!(&self.vcpu_fd, err, V10, u128)?,
+                get_reg!(&self.vcpu_fd, err, V11, u128)?,
+                get_reg!(&self.vcpu_fd, err, V12, u128)?,
+                get_reg!(&self.vcpu_fd, err, V13, u128)?,
+                get_reg!(&self.vcpu_fd, err, V14, u128)?,
+                get_reg!(&self.vcpu_fd, err, V15, u128)?,
+                get_reg!(&self.vcpu_fd, err, V16, u128)?,
+                get_reg!(&self.vcpu_fd, err, V17, u128)?,
+                get_reg!(&self.vcpu_fd, err, V18, u128)?,
+                get_reg!(&self.vcpu_fd, err, V19, u128)?,
+                get_reg!(&self.vcpu_fd, err, V20, u128)?,
+                get_reg!(&self.vcpu_fd, err, V21, u128)?,
+                get_reg!(&self.vcpu_fd, err, V22, u128)?,
+                get_reg!(&self.vcpu_fd, err, V23, u128)?,
+                get_reg!(&self.vcpu_fd, err, V24, u128)?,
+                get_reg!(&self.vcpu_fd, err, V25, u128)?,
+                get_reg!(&self.vcpu_fd, err, V26, u128)?,
+                get_reg!(&self.vcpu_fd, err, V27, u128)?,
+                get_reg!(&self.vcpu_fd, err, V28, u128)?,
+                get_reg!(&self.vcpu_fd, err, V29, u128)?,
+                get_reg!(&self.vcpu_fd, err, V30, u128)?,
+                get_reg!(&self.vcpu_fd, err, V31, u128)?,
+            ],
+            fpsr: get_reg!(&self.vcpu_fd, err, FPSR, u32)?,
+            fpcr: get_reg!(&self.vcpu_fd, err, FPCR, u32)?,
+        })
+    }
+
+    fn set_fpu(&self, fpu: &CommonFpu) -> Result<(), RegisterError> {
+        use crate::hypervisor::regs::kvm_reg::set_reg;
+        fn err(e: kvm_ioctls::Error) -> RegisterError {
+            RegisterError::SetFpu(e.into())
+        }
+        set_reg!(&self.vcpu_fd, err, V0, u128, fpu.v[0])?;
+        set_reg!(&self.vcpu_fd, err, V1, u128, fpu.v[1])?;
+        set_reg!(&self.vcpu_fd, err, V2, u128, fpu.v[2])?;
+        set_reg!(&self.vcpu_fd, err, V3, u128, fpu.v[3])?;
+        set_reg!(&self.vcpu_fd, err, V4, u128, fpu.v[4])?;
+        set_reg!(&self.vcpu_fd, err, V5, u128, fpu.v[5])?;
+        set_reg!(&self.vcpu_fd, err, V6, u128, fpu.v[6])?;
+        set_reg!(&self.vcpu_fd, err, V7, u128, fpu.v[7])?;
+        set_reg!(&self.vcpu_fd, err, V8, u128, fpu.v[8])?;
+        set_reg!(&self.vcpu_fd, err, V9, u128, fpu.v[9])?;
+        set_reg!(&self.vcpu_fd, err, V10, u128, fpu.v[10])?;
+        set_reg!(&self.vcpu_fd, err, V11, u128, fpu.v[11])?;
+        set_reg!(&self.vcpu_fd, err, V12, u128, fpu.v[12])?;
+        set_reg!(&self.vcpu_fd, err, V13, u128, fpu.v[13])?;
+        set_reg!(&self.vcpu_fd, err, V14, u128, fpu.v[14])?;
+        set_reg!(&self.vcpu_fd, err, V15, u128, fpu.v[15])?;
+        set_reg!(&self.vcpu_fd, err, V16, u128, fpu.v[16])?;
+        set_reg!(&self.vcpu_fd, err, V17, u128, fpu.v[17])?;
+        set_reg!(&self.vcpu_fd, err, V18, u128, fpu.v[18])?;
+        set_reg!(&self.vcpu_fd, err, V19, u128, fpu.v[19])?;
+        set_reg!(&self.vcpu_fd, err, V20, u128, fpu.v[20])?;
+        set_reg!(&self.vcpu_fd, err, V21, u128, fpu.v[21])?;
+        set_reg!(&self.vcpu_fd, err, V22, u128, fpu.v[22])?;
+        set_reg!(&self.vcpu_fd, err, V23, u128, fpu.v[23])?;
+        set_reg!(&self.vcpu_fd, err, V24, u128, fpu.v[24])?;
+        set_reg!(&self.vcpu_fd, err, V25, u128, fpu.v[25])?;
+        set_reg!(&self.vcpu_fd, err, V26, u128, fpu.v[26])?;
+        set_reg!(&self.vcpu_fd, err, V27, u128, fpu.v[27])?;
+        set_reg!(&self.vcpu_fd, err, V28, u128, fpu.v[28])?;
+        set_reg!(&self.vcpu_fd, err, V29, u128, fpu.v[29])?;
+        set_reg!(&self.vcpu_fd, err, V30, u128, fpu.v[30])?;
+        set_reg!(&self.vcpu_fd, err, V31, u128, fpu.v[31])?;
+        set_reg!(&self.vcpu_fd, err, FPSR, u32, fpu.fpsr)?;
+        set_reg!(&self.vcpu_fd, err, FPCR, u32, fpu.fpcr)?;
+        Ok(())
+    }
+
+    fn sregs(&self) -> Result<CommonSpecialRegisters, RegisterError> {
+        use crate::hypervisor::regs::kvm_reg::get_reg;
+        fn err(e: kvm_ioctls::Error) -> RegisterError {
+            RegisterError::GetSregs(e.into())
+        }
+        Ok(CommonSpecialRegisters {
+            ttbr0_el1: get_reg!(&self.vcpu_fd, err, TTBR0_EL1, u64)?,
+            tcr_el1: get_reg!(&self.vcpu_fd, err, TCR_EL1, u64)?,
+            mair_el1: get_reg!(&self.vcpu_fd, err, MAIR_EL1, u64)?,
+            sctlr_el1: get_reg!(&self.vcpu_fd, err, SCTLR_EL1, u64)?,
+            cpacr_el1: get_reg!(&self.vcpu_fd, err, CPACR_EL1, u64)?,
+            vbar_el1: get_reg!(&self.vcpu_fd, err, VBAR_EL1, u64)?,
+            sp_el1: get_reg!(&self.vcpu_fd, err, SP_EL1, u64)?,
+        })
+    }
+
+    fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<(), RegisterError> {
+        use crate::hypervisor::regs::kvm_reg::set_reg;
+        fn err(e: kvm_ioctls::Error) -> RegisterError {
+            RegisterError::SetSregs(e.into())
+        }
+        set_reg!(&self.vcpu_fd, err, TTBR0_EL1, u64, sregs.ttbr0_el1)?;
+        set_reg!(&self.vcpu_fd, err, TCR_EL1, u64, sregs.tcr_el1)?;
+        set_reg!(&self.vcpu_fd, err, MAIR_EL1, u64, sregs.mair_el1)?;
+        set_reg!(&self.vcpu_fd, err, SCTLR_EL1, u64, sregs.sctlr_el1)?;
+        set_reg!(&self.vcpu_fd, err, CPACR_EL1, u64, sregs.cpacr_el1)?;
+        set_reg!(&self.vcpu_fd, err, VBAR_EL1, u64, sregs.vbar_el1)?;
+        set_reg!(&self.vcpu_fd, err, SP_EL1, u64, sregs.sp_el1)?;
+        Ok(())
+    }
+
+    fn debug_regs(
+        &self,
+    ) -> std::result::Result<crate::hypervisor::regs::CommonDebugRegs, RegisterError> {
+        todo!()
+    }
+
+    fn set_debug_regs(
+        &self,
+        _drs: &crate::hypervisor::regs::CommonDebugRegs,
+    ) -> std::result::Result<(), RegisterError> {
+        todo!()
+    }
+
+    fn xsave(&self) -> std::result::Result<Vec<u8>, RegisterError> {
+        unimplemented!("aarch64 does not support XSAVE operations")
+    }
+
+    fn reset_xsave(&self) -> std::result::Result<(), RegisterError> {
+        unimplemented!("aarch64 does not support XSAVE operations")
+    }
+
+    #[cfg(test)]
+    fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> {
+        unimplemented!("aarch64 does not support XSAVE operations")
+    }
+
+    fn can_reset_vcpu(&self) -> bool {
+        true
+    }
+    fn reset_vcpu(&mut self) -> Result<(), ResetVcpuError> {
+        self.run_immediate_exit().map_err(|e| {
+            e.map(ResetVcpuError::Hypervisor)
+                .map_err(ResetVcpuError::Unknown)
+                .unwrap_or_else(|e| e)
+        })?;
+        self.vcpu_init().map_err(ResetVcpuError::Hypervisor)?;
+        self.run_immediate_exit().map_err(|e| {
+            e.map(ResetVcpuError::Hypervisor)
+                .map_err(ResetVcpuError::Unknown)
+                .unwrap_or_else(|e| e)
+        })?;
+        Ok(())
     }
 }
diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs
index db68dfdd0..fdaa1ab12 100644
--- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs
+++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs
@@ -175,7 +175,7 @@ impl KvmVm {
                 == CPUID_FUNCTION_PROCESSOR_CAPACITY_PARAMETERS_AND_EXTENDED_FEATURE_IDENTIFICATION
             {
                 entry.eax &= !0xff;
-                entry.eax |= hyperlight_common::layout::MAX_GPA.ilog2() + 1;
+                entry.eax |= hyperlight_common::layout::SCRATCH_TOP_GPA.ilog2() + 1;
             }
         }
         vcpu_fd
diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs
index ecb19a09f..55a5b5e0f 100644
--- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs
+++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs
@@ -107,7 +107,7 @@ pub(crate) enum HypervisorType {
 /// Minimum XSAVE buffer size: 512 bytes legacy region + 64 bytes header.
 /// Only used by MSHV and WHP which use compacted XSAVE format and need to
 /// validate buffer size before accessing XCOMP_BV.
-#[cfg(any(mshv3, target_os = "windows"))]
+#[cfg(all(target_arch = "x86_64", any(mshv3, target_os = "windows")))]
 pub(crate) const XSAVE_MIN_SIZE: usize = 576;
 
 /// Standard XSAVE buffer size (4KB) used by KVM and MSHV.
@@ -202,6 +202,9 @@ pub enum RunVcpuError {
     IncrementRip(HypervisorError),
     #[error("Parse GPA access info failed")]
     ParseGpaAccessInfo,
+    #[cfg(target_arch = "aarch64")]
+    #[error("Flush MMIO pending state failed: {0}")]
+    FlushMmioPending(String),
     #[error("Unknown error: {0}")]
     Unknown(HypervisorError),
 }
@@ -246,6 +249,18 @@ pub enum RegisterError {
     ConversionFailed(String),
 }
 
+#[derive(Debug, Clone, thiserror::Error)]
+pub enum ResetVcpuError {
+    #[error("Single-operation vcpu reset not supported on this hypervisor")]
+    NotSupported,
+    #[error("Hypervisor operation failed: {0}")]
+    Hypervisor(HypervisorError),
+    #[error("Register operation failed: {0}")]
+    Register(#[from] RegisterError),
+    #[error("Operation failed: {0}")]
+    Unknown(String),
+}
+
 /// Map memory error
 #[derive(Debug, Clone, thiserror::Error)]
 pub enum MapMemoryError {
@@ -353,6 +368,13 @@ pub(crate) trait VirtualMachine: Debug + Send {
     #[cfg(not(feature = "i686-guest"))]
     fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError>;
 
+    /// Single-operation vCPU reset
+    fn can_reset_vcpu(&self) -> bool {
+        false
+    }
+    fn reset_vcpu(&mut self) -> std::result::Result<(), ResetVcpuError> {
+        Err(ResetVcpuError::NotSupported)
+    }
     /// Get partition handle
     #[cfg(target_os = "windows")]
     fn partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE;
diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs
index 9e5d843d1..3c696a789 100644
--- a/src/hyperlight_host/src/mem/mgr.rs
+++ b/src/hyperlight_host/src/mem/mgr.rs
@@ -650,7 +650,7 @@ impl SandboxMemoryManager<HostSharedMemory> {
     ) -> Result<Vec<CrashDumpRegion>> {
         use crate::sandbox::snapshot::SharedMemoryPageTableBuffer;
 
-        let len = hyperlight_common::layout::MAX_GVA;
+        let len = hyperlight_common::layout::SCRATCH_TOP_GVA;
 
         let regions = self.shared_mem.with_contents(|snapshot| {
             self.scratch_mem.with_contents(|scratch| {
diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
index 241622cab..fb8f28ecd 100644
--- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
+++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
@@ -1215,12 +1215,12 @@ mod tests {
         match res.unwrap_err() {
             HyperlightError::GuestAborted(_, msg) => {
                 // msg should indicate we got an invalid opcode exception
+                #[cfg(target_arch = "x86_64")]
                 assert!(msg.contains("InvalidOpcode"));
+                #[cfg(target_arch = "aarch64")]
+                assert!(msg.contains("0x2000000"));
             }
-            e => panic!(
-                "Expected HyperlightError::GuestExecutionError but got {:?}",
-                e
-            ),
+            e => panic!("Expected HyperlightError::GuestAborted but got {:?}", e),
         }
     }
 
@@ -1306,7 +1306,10 @@ mod tests {
         .evolve()
         .unwrap();
 
+        #[cfg(target_arch = "x86_64")]
         let expected = &[0x90, 0x90, 0x90, 0xC3]; // NOOP slide to RET
+        #[cfg(target_arch = "aarch64")]
+        let expected = &[0x1f, 0x20, 0x03, 0xd5, 0xc0, 0x03, 0x5f, 0xd6];
         let map_mem = page_aligned_memory(expected);
         let guest_base = 0x1_0000_0000; // Arbitrary guest base address
 
@@ -1488,8 +1491,9 @@ mod tests {
         let dr0_initial: u64 = sandbox.call("GetDr0", ()).unwrap();
         assert_eq!(dr0_initial, 0, "DR0 should initially be 0");
 
-        // Dirty DR0 by setting it to a known non-zero value
-        const DIRTY_VALUE: u64 = 0xDEAD_BEEF_CAFE_BABE;
+        // Dirty DR0 by setting it to a known non-zero value, avoiding
+        // bits that are reserved in aarch64 DBGBVR0_EL1
+        const DIRTY_VALUE: u64 = 0xFFFF_FEDC_7654_3210;
         sandbox.call::<()>("SetDr0", DIRTY_VALUE).unwrap();
         let dr0_dirty: u64 = sandbox.call("GetDr0", ()).unwrap();
         assert_eq!(
diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs
index e4c7b1133..8579cdaa0 100644
--- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs
+++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs
@@ -17,7 +17,7 @@ limitations under the License.
 use std::collections::{BTreeMap, HashMap};
 use std::sync::atomic::{AtomicU64, Ordering};
 
-use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva};
+use hyperlight_common::layout::{io_page, scratch_base_gpa, scratch_base_gva};
 use hyperlight_common::vmem;
 use hyperlight_common::vmem::{
     BasicMapping, CowMapping, Mapping, MappingKind, PAGE_SIZE, SpaceAwareMapping, SpaceId, TableOps,
@@ -296,6 +296,21 @@ unsafe fn guest_page<'a>(
 }
 
 fn map_specials(pt_buf: &GuestPageTableBuffer, scratch_size: usize) {
+    if let Some((phys_base, virt_base)) = io_page() {
+        // Map the IO page
+        let mapping = Mapping {
+            phys_base,
+            virt_base,
+            len: PAGE_SIZE as u64,
+            kind: MappingKind::Basic(BasicMapping {
+                readable: true,
+                writable: true,
+                executable: false,
+            }),
+            user_accessible: false,
+        };
+        unsafe { vmem::map(pt_buf, mapping) };
+    }
     // Map the scratch region
     let mapping = Mapping {
         phys_base: scratch_base_gpa(scratch_size),
@@ -405,7 +420,7 @@ impl Snapshot {
         layout.set_pt_size(pt_bytes.len())?;
         memory.extend(&pt_bytes);
 
-        let exn_stack_top_gva = hyperlight_common::layout::MAX_GVA as u64
+        let exn_stack_top_gva = hyperlight_common::layout::SCRATCH_TOP_GVA as u64
             - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET
             + 1;
 
@@ -473,7 +488,7 @@ impl Snapshot {
                         &op,
                         root_pt_gpas,
                         0,
-                        hyperlight_common::layout::MAX_GVA as u64,
+                        hyperlight_common::layout::SCRATCH_TOP_GVA as u64,
                     )
                 };
 
diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs
index c037af06e..8f5f2e763 100644
--- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs
+++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs
@@ -122,7 +122,6 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result<Mult
     vm.initialise(
         peb_addr,
         seed,
-        page_size,
         &mut hshm,
         &u_sbox.host_funcs,
         u_sbox.max_guest_log_level,
diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs
index cc7b7587d..da06eeace 100644
--- a/src/hyperlight_host/tests/integration_test.rs
+++ b/src/hyperlight_host/tests/integration_test.rs
@@ -711,7 +711,12 @@ fn execute_on_heap() {
 
         #[cfg(not(feature = "executable_heap"))]
         assert!(
-            result.unwrap_err().to_string().contains("PageFault"),
+            result.unwrap_err().to_string().contains(
+                #[cfg(target_arch = "x86_64")]
+                "PageFault",
+                #[cfg(target_arch = "aarch64")]
+                "Exception Syndrome: 0x86",
+            ),
             "should get page fault"
         );
     });
@@ -1644,6 +1649,7 @@ fn interrupt_infinite_moving_loop_stress_test() {
 }
 
 #[test]
+#[cfg(target_arch = "x86_64")]
 fn exception_handler_installation_and_validation() {
     with_rust_sandbox(|mut sandbox| {
         // Verify handler count starts at 0
@@ -1692,9 +1698,14 @@ fn fill_heap_and_cause_exception() {
 
                 // Verify the message was properly formatted (proves no-allocation path worked)
                 // Exception vector 6 is #UD (Invalid Opcode from ud2 instruction)
+                #[cfg(target_arch = "x86_64")]
+                let vector = "Exception vector: 6";
+                #[cfg(target_arch = "aarch64")]
+                let vector = "Exception vector: CurrentSP0 Synchronous";
                 assert!(
-                    message.contains("Exception vector: 6"),
-                    "Message should contain 'Exception vector: 6'\nFull error: {:?}",
+                    message.contains(vector),
+                    "Message should contain '{}'\nFull error: {:?}",
+                    vector,
                     err
                 );
                 assert!(
@@ -1702,11 +1713,18 @@ fn fill_heap_and_cause_exception() {
                     "Message should contain 'Faulting Instruction:'\nFull error: {:?}",
                     err
                 );
+                #[cfg(target_arch = "x86_64")]
                 assert!(
                     message.contains("Stack Pointer:"),
                     "Message should contain 'Stack Pointer:'\nFull error: {:?}",
                     err
                 );
+                #[cfg(target_arch = "aarch64")]
+                assert!(
+                    message.contains("Exception Syndrome:"),
+                    "Message should contain 'Exception Syndrome:'\nFull error: {:?}",
+                    err
+                );
             }
             _ => panic!("Expected GuestAborted error, got: {:?}", err),
         }
diff --git a/src/hyperlight_libc/build.rs b/src/hyperlight_libc/build.rs
index d80c66de5..495e20d5e 100644
--- a/src/hyperlight_libc/build.rs
+++ b/src/hyperlight_libc/build.rs
@@ -23,7 +23,9 @@ use std::{env, fs};
 use anyhow::{Context, Result, bail};
 use bindgen::Formatter::Prettyplease;
 use bindgen::RustEdition::Edition2021;
-use build_files::{LIBC_FILES, LIBC_FILES_X86, LIBM_FILES, LIBM_FILES_X86};
+use build_files::{
+    LIBC_FILES, LIBC_FILES_AARCH64, LIBC_FILES_X86, LIBM_FILES, LIBM_FILES_AARCH64, LIBM_FILES_X86,
+};
 
 fn copy_includes<P: AsRef<Path>, Q: AsRef<Path> + std::fmt::Debug>(
     include_dir: P,
@@ -121,6 +123,10 @@ fn cc_build(picolibc_dir: &PathBuf, target: &str) -> Result<cc::Build> {
             build.include(picolibc_dir.join("libm/machine/x86"));
             build.include(picolibc_dir.join("libc/machine/x86"));
         }
+        "aarch64" => {
+            build.include(picolibc_dir.join("libc/machine/aarch64"));
+            build.include(picolibc_dir.join("libm/machine/aarch64"));
+        }
         arch => {
             bail!("Unsupported target architecture: {arch}");
         }
@@ -139,6 +145,7 @@ fn add_libc(build: &mut cc::Build, picolibc_dir: &Path, target: &str) -> Result<
     let base = LIBC_FILES.iter();
     let files = match target {
         "x86" | "x86_64" => base.chain(LIBC_FILES_X86.iter()),
+        "aarch64" => base.chain(LIBC_FILES_AARCH64.iter()),
         arch => bail!("Unsupported target architecture: {arch}"),
     };
 
@@ -156,6 +163,7 @@ fn add_libm(build: &mut cc::Build, picolibc_dir: &Path, target: &str) -> Result<
     let base = LIBM_FILES.iter();
     let files = match target {
         "x86" | "x86_64" => base.chain(LIBM_FILES_X86.iter()),
+        "aarch64" => base.chain(LIBM_FILES_AARCH64.iter()),
         arch => bail!("Unsupported target architecture: {arch}"),
     };
 
diff --git a/src/hyperlight_libc/build_files.rs b/src/hyperlight_libc/build_files.rs
index ab4bf3535..23133071b 100644
--- a/src/hyperlight_libc/build_files.rs
+++ b/src/hyperlight_libc/build_files.rs
@@ -659,6 +659,29 @@ pub(crate) const LIBC_FILES_X86: &[&str] = &[
     "machine/x86/tls.c",
 ];
 
+pub(crate) const LIBC_FILES_AARCH64: &[&str] = &[
+    "machine/aarch64/interrupt.c",
+    "machine/aarch64/interrupt_vector.S",
+    "machine/aarch64/memchr.S",
+    "machine/aarch64/memcmp.S",
+    "machine/aarch64/memcpy.S",
+    "machine/aarch64/memmove.S",
+    "machine/aarch64/memrchr.S",
+    "machine/aarch64/memset.S",
+    "machine/aarch64/rawmemchr.S",
+    "machine/aarch64/setjmp.S",
+    "machine/aarch64/stpcpy.S",
+    "machine/aarch64/strchr.S",
+    "machine/aarch64/strchrnul.S",
+    "machine/aarch64/strcmp.S",
+    "machine/aarch64/strcpy.S",
+    "machine/aarch64/strlen.S",
+    "machine/aarch64/strncmp.S",
+    "machine/aarch64/strnlen.S",
+    "machine/aarch64/strrchr.S",
+    "machine/aarch64/tls.c",
+];
+
 pub(crate) const LIBM_FILES: &[&str] = &[
     "common/copysignl.c",
     "common/exp10l.c",
@@ -942,3 +965,5 @@ pub(crate) const LIBM_FILES: &[&str] = &[
 ];
 
 pub(crate) const LIBM_FILES_X86: &[&str] = &["machine/x86/fenv.c"];
+
+pub(crate) const LIBM_FILES_AARCH64: &[&str] = &[];
diff --git a/src/tests/rust_guests/dummyguest/src/main.rs b/src/tests/rust_guests/dummyguest/src/main.rs
index 924bb686c..91a62edc3 100644
--- a/src/tests/rust_guests/dummyguest/src/main.rs
+++ b/src/tests/rust_guests/dummyguest/src/main.rs
@@ -33,6 +33,7 @@ fn halt() {
     // VmAction::Halt = 108; using raw constant to avoid pulling in
     // anyhow (via hyperlight_common's TryFrom impl) which requires alloc.
     unsafe {
+        #[cfg(target_arch = "x86_64")]
         asm!(
             "out dx, eax",
             "cli",
@@ -40,18 +41,28 @@ fn halt() {
             in("dx") 108u16,
             in("eax") 0u32,
         );
+        #[cfg(target_arch = "aarch64")]
+        asm!(
+            "str {val}, [{addr}]",
+            val = in(reg) 0, addr = in(reg) 0xffff_ffff_e000u64 + 108 * 8,
+        );
     }
 }
 
 fn mmio_read() {
     unsafe {
+        #[cfg(target_arch = "x86_64")]
         asm!("mov al, [0x8000]");
+
+        let mut out: u8;
+        #[cfg(target_arch = "aarch64")]
+        asm!("ldr {0:x}, [{1}]", out(reg) out, in(reg) 0x8000);
     }
 }
 
 #[allow(non_snake_case)]
 #[no_mangle]
-pub extern "win64" fn entrypoint(a: i64, b: i64, c: i32) -> i32 {
+pub extern "C" fn entrypoint(a: i64, b: i64, c: i32) -> i32 {
     if a != 0x230000 || b != 1234567890 || c != 4096 {
         mmio_read();
     }
diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs
index b6844a716..d039519da 100644
--- a/src/tests/rust_guests/simpleguest/src/main.rs
+++ b/src/tests/rust_guests/simpleguest/src/main.rs
@@ -45,6 +45,7 @@ use hyperlight_common::log_level::GuestLogFilter;
 use hyperlight_common::vmem::{BasicMapping, MappingKind};
 use hyperlight_guest::error::{HyperlightGuestError, Result};
 use hyperlight_guest::exit::{abort_with_code, abort_with_code_and_message};
+#[cfg(target_arch = "x86_64")]
 use hyperlight_guest_bin::exception::arch::{Context, ExceptionInfo};
 use hyperlight_guest_bin::guest_function::definition::{GuestFunc, GuestFunctionDefinition};
 use hyperlight_guest_bin::guest_function::register::register_function;
@@ -83,6 +84,7 @@ fn echo_double(value: f64) -> f64 {
 
 // Test exception handler that validates stack layout and records invocation
 // It is designed to interact with the trigger_int3 breakpoint exception function below
+#[cfg(target_arch = "x86_64")]
 fn test_exception_handler(
     exception_number: u64,
     _exception_info: *mut ExceptionInfo,
@@ -131,6 +133,7 @@ fn test_exception_handler(
 
 /// Install handler for a specific vector
 #[guest_function("InstallHandler")]
+#[cfg(target_arch = "x86_64")]
 fn install_handler(vector: i32) {
     hyperlight_guest_bin::exception::arch::HANDLERS[vector as usize]
         .store(test_exception_handler as usize as u64, Ordering::Release);
@@ -145,6 +148,7 @@ fn get_exception_handler_call_count() -> i32 {
 
 /// Trigger an INT3 breakpoint exception (vector 3)
 #[guest_function("TriggerInt3")]
+#[cfg(target_arch = "x86_64")]
 fn trigger_int3() -> i32 {
     // Set up test value in R9 before triggering exception
     let test_value: u64 = TEST_R9_VALUE;
@@ -346,7 +350,7 @@ fn fill_heap_and_cause_exception() {
     }
 
     // trigger an undefined instruction exception
-    unsafe { core::arch::asm!("ud2") };
+    trigger_exception();
 }
 
 #[guest_function("ExhaustHeap")]
@@ -447,7 +451,13 @@ fn test_guest_panic(message: String) {
 fn execute_on_heap() -> String {
     unsafe {
         // NO-OP followed by RET
-        let heap_memory = Box::new([0x90u8, 0xC3]);
+        let mut heap_memory = Box::new(
+            #[cfg(target_arch = "x86_64")]
+            [0x90u8, 0xC3],
+            #[cfg(target_arch = "aarch64")]
+            [0x1f, 0x20, 0x03, 0xd5, 0xc0, 0x03, 0x5f, 0xd6],
+        );
+        dicachesync(heap_memory.as_mut_ptr(), heap_memory.len());
         let heap_fn: fn() = core::mem::transmute(Box::into_raw(heap_memory));
         heap_fn();
         black_box(heap_fn); // avoid optimization when running in release mode
@@ -482,13 +492,21 @@ fn log_message(message: String, level: i32) {
 #[guest_function("TriggerException")]
 fn trigger_exception() {
     // trigger an undefined instruction exception
-    unsafe { core::arch::asm!("ud2") };
+    #[cfg(target_arch = "x86_64")]
+    unsafe {
+        core::arch::asm!("ud2")
+    };
+    #[cfg(target_arch = "aarch64")]
+    unsafe {
+        core::arch::asm!("udf #0")
+    };
 }
 
 /// Execute an OUT instruction with an arbitrary port and value.
 /// This is used to test that invalid OUT ports cause errors.
 #[guest_function("OutbWithPort")]
 fn outb_with_port(port: u32, value: u32) {
+    #[cfg(target_arch = "x86_64")]
     unsafe {
         core::arch::asm!(
             "out dx, eax",
@@ -497,6 +515,12 @@ fn outb_with_port(port: u32, value: u32) {
             options(preserves_flags, nomem, nostack)
         );
     }
+    #[cfg(target_arch = "aarch64")]
+    unsafe {
+        (hyperlight_common::layout::io_page().unwrap().1 as *mut u64)
+            .wrapping_add(port as usize)
+            .write_volatile(value as u64);
+    }
 }
 
 // =============================================================================
@@ -513,6 +537,7 @@ static TIMER_IRQ_COUNT: AtomicU32 = AtomicU32::new(0);
 // for the atomic counter update, and sends a non-specific EOI to the master PIC.
 //
 // NOTE: global_asm! on x86_64 in Rust defaults to Intel syntax.
+#[cfg(target_arch = "x86_64")]
 core::arch::global_asm!(
     ".globl _timer_irq_handler",
     "_timer_irq_handler:",
@@ -551,6 +576,7 @@ struct IdtPtr {
 /// - `max_spin`:  maximum busy-wait iterations before giving up
 ///
 /// Returns the number of timer interrupts received.
+#[cfg(target_arch = "x86_64")]
 #[guest_function("TestTimerInterrupts")]
 fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 {
     // Reset counter
@@ -700,6 +726,7 @@ fn call_given_paramless_hostfunc_that_returns_i64(hostfuncname: String) -> Resul
 }
 
 #[guest_function("UseSSE2Registers")]
+#[cfg(target_arch = "x86_64")]
 fn use_sse2_registers() {
     let val: f32 = 1.2f32;
     unsafe { core::arch::asm!("movss xmm1, DWORD PTR [{0}]", in(reg) &val) };
@@ -707,13 +734,27 @@ fn use_sse2_registers() {
 
 #[guest_function("SetDr0")]
 fn set_dr0(value: u64) {
-    unsafe { core::arch::asm!("mov dr0, {}", in(reg) value) };
+    #[cfg(target_arch = "x86_64")]
+    unsafe {
+        core::arch::asm!("mov dr0, {}", in(reg) value)
+    };
+    #[cfg(target_arch = "aarch64")]
+    unsafe {
+        core::arch::asm!("msr dbgbvr0_el1, {}", in(reg) value)
+    };
 }
 
 #[guest_function("GetDr0")]
 fn get_dr0() -> u64 {
     let value: u64;
-    unsafe { core::arch::asm!("mov {}, dr0", out(reg) value) };
+    #[cfg(target_arch = "x86_64")]
+    unsafe {
+        core::arch::asm!("mov {}, dr0", out(reg) value)
+    };
+    #[cfg(target_arch = "aarch64")]
+    unsafe {
+        core::arch::asm!("mrs {}, dbgbvr0_el1", out(reg) value)
+    };
     value
 }
 
@@ -808,6 +849,60 @@ fn write_mapped_buffer(base: u64, len: u64) -> bool {
     true
 }
 
+fn dicachesync(_base: *mut u8, _len: usize) {
+    #[cfg(target_arch = "aarch64")]
+    unsafe {
+        let ctr_el0: u64;
+        core::arch::asm!("mrs {}, ctr_el0", out(reg) ctr_el0);
+        let iminline = 4 * (1 << (ctr_el0 & 0xf));
+        #[allow(unused)]
+        let dminline = 4 * (1 << ((ctr_el0 >> 16) & 0xf));
+        // See the comment in the `KVM_EXIT_ARM_NISV` case of
+        // `run_vcpu` in
+        // src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs
+        // for an explanation of why this cache maintenance sequence
+        // is so complex.
+        core::arch::asm!("
+            ldr xzr, [{addr}]
+            msr nzcv, xzr
+            b 2f
+
+        0:  ldr xzr, [{tmp}]
+            msr nzcv, xzr
+            b 3f
+        1:  ldr xzr, [{tmp}]
+            msr nzcv, xzr
+            b 4f
+
+        2:  mov {tmp}, {addr}
+
+        3:  dc cvau, {tmp}
+            b.eq 0b
+            add {tmp}, {tmp}, {dminline:x}
+            cmp {tmp}, {max}
+            b.lt 3b
+
+            dsb ish
+
+            mov {tmp}, {addr}
+
+        4:  ic ivau, {tmp}
+            b.eq 1b
+            add {tmp}, {tmp}, {iminline:x}
+            cmp {tmp}, {max}
+            b.lt 4b
+
+            dsb ish
+            isb
+        ",
+            iminline = in(reg) iminline,
+            dminline = in(reg) dminline,
+            addr = in(reg) _base as usize,
+            max = in(reg) _base as usize + _len,
+            tmp = out(reg) _);
+    }
+}
+
 #[guest_function("ExecMappedBuffer")]
 fn exec_mapped_buffer(base: u64, len: u64) -> bool {
     let base = base as usize as *mut u8;
@@ -831,6 +926,9 @@ fn exec_mapped_buffer(base: u64, len: u64) -> bool {
 
     // Should be safe as long as data is something like a NOOP followed by a RET
     let func: fn() = unsafe { core::mem::transmute(data.as_ptr()) };
+
+    dicachesync(base, len);
+
     func();
 
     true
@@ -987,15 +1085,8 @@ fn corrupt_output_size_prefix() -> i32 {
         buf[8..12].copy_from_slice(&0xFFFF_FFFBu32.to_le_bytes());
         buf[12..16].copy_from_slice(&[0u8; 4]);
         buf[16..24].copy_from_slice(&8_u64.to_le_bytes());
-
-        core::arch::asm!(
-            "out dx, eax",
-            "cli",
-            "hlt",
-            in("dx") hyperlight_common::outb::VmAction::Halt as u16,
-            in("eax") 0u32,
-            options(noreturn),
-        );
+        outb_with_port(hyperlight_common::outb::VmAction::Halt as u32, 0u32);
+        unreachable!();
     }
 }
 
@@ -1010,15 +1101,8 @@ fn corrupt_output_back_pointer() -> i32 {
         buf[0..8].copy_from_slice(&24_u64.to_le_bytes());
         buf[8..16].copy_from_slice(&[0u8; 8]);
         buf[16..24].copy_from_slice(&0xDEAD_u64.to_le_bytes());
-
-        core::arch::asm!(
-            "out dx, eax",
-            "cli",
-            "hlt",
-            in("dx") hyperlight_common::outb::VmAction::Halt as u16,
-            in("eax") 0u32,
-            options(noreturn),
-        );
+        outb_with_port(hyperlight_common::outb::VmAction::Halt as u32, 0u32);
+        unreachable!();
     }
 }