diff --git a/crates/osmodifier/src/grub_cfg.rs b/crates/osmodifier/src/grub_cfg.rs index ade45dca97..90f6248be4 100644 --- a/crates/osmodifier/src/grub_cfg.rs +++ b/crates/osmodifier/src/grub_cfg.rs @@ -18,6 +18,10 @@ use crate::OsModifierContext; /// Possible grub.cfg locations, tried in order. const GRUB_CFG_PATHS: &[&str] = &["/boot/grub2/grub.cfg", "/boot/grub/grub.cfg"]; +/// BLS (Boot Loader Spec) entry directory. Fedora-based distros (including +/// AZL4) store kernel boot entries here instead of inline in grub.cfg. +const BLS_ENTRIES_DIR: &str = "/boot/loader/entries"; + /// Extract boot arguments from the generated grub.cfg. /// /// Returns a tuple of (args_to_sync, optional_root_device). @@ -37,7 +41,14 @@ pub fn extract_boot_args_from_grub_cfg( // Find the non-recovery linux command lines. // Go expects exactly one; error otherwise. - let linux_lines = find_non_recovery_linux_lines(&content)?; + let linux_lines = match find_non_recovery_linux_lines(&content) { + Ok(lines) => lines, + Err(_) if content.contains("blscfg") => { + debug!("grub.cfg uses BLS (blscfg); reading boot args from BLS entries"); + extract_options_from_bls_entries(ctx)? + } + Err(e) => return Err(e), + }; if linux_lines.len() != 1 { bail!( "expected 1 non-recovery linux line, found {}", @@ -94,6 +105,77 @@ fn find_grub_cfg(ctx: &OsModifierContext) -> Result { bail!("Could not find grub.cfg at any of: {:?}", GRUB_CFG_PATHS) } +/// Read boot arguments from BLS (Boot Loader Spec) entries. +/// +/// Scans `{root}/boot/loader/entries/*.conf`, skips entries whose title +/// contains "rescue" or "recovery" (case-insensitive), and returns the +/// `options` line from the first valid entry (sorted lexically, matching +/// grub's ordering). +fn extract_options_from_bls_entries(ctx: &OsModifierContext) -> Result, Error> { + let entries_dir = ctx.path(BLS_ENTRIES_DIR); + let mut conf_files: Vec = fs::read_dir(&entries_dir) + .with_context(|| format!("Failed to read BLS entries dir '{}'", entries_dir.display()))? + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.extension().is_some_and(|ext| ext == "conf")) + .collect(); + + conf_files.sort(); + + for conf_path in &conf_files { + let content = fs::read_to_string(conf_path) + .with_context(|| format!("Failed to read BLS entry '{}'", conf_path.display()))?; + + let mut title = None; + let mut options = None; + + for line in content.lines() { + // BLS entries may be indented; trim leading whitespace and require a + // whitespace delimiter after the key so we do not match keys like + // "titlebar" or pick up an empty value. + let line = line.trim_start(); + if let Some(value) = line + .strip_prefix("title") + .filter(|v| v.starts_with(char::is_whitespace)) + { + title = Some(value.trim().to_string()); + } else if let Some(value) = line + .strip_prefix("options") + .filter(|v| v.starts_with(char::is_whitespace)) + { + options = Some(value.trim().to_string()); + } + } + + // Skip recovery/rescue entries. + if let Some(ref t) = title { + let lower = t.to_lowercase(); + if lower.contains("rescue") || lower.contains("recovery") { + trace!( + "Skipping BLS rescue/recovery entry: {}", + conf_path.display() + ); + continue; + } + } + + if let Some(opts) = options { + debug!( + "Using BLS entry '{}': options = {opts}", + conf_path.display() + ); + // Return as a synthetic "linux" line: prepend a dummy kernel path + // so the downstream parser (which skips the first token) works. + return Ok(vec![format!("/boot/vmlinuz {opts}")]); + } + } + + bail!( + "no non-recovery BLS entry found in '{}'", + entries_dir.display() + ) +} + /// Return the first whitespace-delimited word from a line, or None if the /// line is empty / whitespace-only. fn first_word(line: &str) -> Option<&str> { @@ -757,4 +839,120 @@ mod tests { assert_eq!(count_braces("menuentry 'title {x}' {"), (1, 0)); assert_eq!(count_braces(r#"menuentry "title {x}" {"#), (1, 0)); } + + // ======================= BLS entry support ======================= + + #[test] + fn test_extract_bls_fallback() { + let tmp = tempdir().unwrap(); + + // Write a BLS-style grub.cfg (contains blscfg, no inline linux lines) + let grub_dir = tmp.path().join("boot/grub2"); + std::fs::create_dir_all(&grub_dir).unwrap(); + std::fs::write( + grub_dir.join("grub.cfg"), + indoc::indoc! {r#" + set timeout=5 + load_env -f /boot/grub2/grubenv + blscfg + "#}, + ) + .unwrap(); + + // Write a BLS entry + let bls_dir = tmp.path().join("boot/loader/entries"); + std::fs::create_dir_all(&bls_dir).unwrap(); + std::fs::write( + bls_dir.join("azl4.conf"), + indoc::indoc! {r#" + title Azure Linux 4.0 (6.6.60) + version 6.6.60 + linux /boot/vmlinuz-6.6.60 + initrd /boot/initramfs-6.6.60.img + options root=/dev/sda2 ro selinux=1 rd.overlayfs=lower,upper,work,/dev/sda5 + "#}, + ) + .unwrap(); + + let ctx = OsModifierContext { + root: tmp.path().to_path_buf(), + }; + + let (args, root_device) = extract_boot_args_from_grub_cfg(&ctx).unwrap(); + assert_eq!(root_device, Some("/dev/sda2".to_string())); + assert!(args.contains(&"selinux=1".to_string())); + } + + #[test] + fn test_extract_bls_skips_recovery() { + let tmp = tempdir().unwrap(); + + let grub_dir = tmp.path().join("boot/grub2"); + std::fs::create_dir_all(&grub_dir).unwrap(); + std::fs::write(grub_dir.join("grub.cfg"), "set timeout=5\nblscfg\n").unwrap(); + + let bls_dir = tmp.path().join("boot/loader/entries"); + std::fs::create_dir_all(&bls_dir).unwrap(); + + // Rescue entry (should be skipped) + std::fs::write( + bls_dir.join("rescue.conf"), + indoc::indoc! {r#" + title Azure Linux 4.0 rescue + version 6.6.60 + linux /boot/vmlinuz-6.6.60 + initrd /boot/initramfs-6.6.60.img + options root=/dev/sda2 ro single + "#}, + ) + .unwrap(); + + // Normal entry (should be used) + std::fs::write( + bls_dir.join("zzz-normal.conf"), + indoc::indoc! {r#" + title Azure Linux 4.0 (6.6.60) + version 6.6.60 + linux /boot/vmlinuz-6.6.60 + initrd /boot/initramfs-6.6.60.img + options root=/dev/sda2 ro selinux=1 + "#}, + ) + .unwrap(); + + let ctx = OsModifierContext { + root: tmp.path().to_path_buf(), + }; + + let (args, root_device) = extract_boot_args_from_grub_cfg(&ctx).unwrap(); + assert_eq!(root_device, Some("/dev/sda2".to_string())); + assert!(args.contains(&"selinux=1".to_string())); + // "single" from rescue entry should NOT appear + assert!(!args.iter().any(|a| a.contains("single"))); + } + + #[test] + fn test_extract_bls_no_entries() { + let tmp = tempdir().unwrap(); + + let grub_dir = tmp.path().join("boot/grub2"); + std::fs::create_dir_all(&grub_dir).unwrap(); + std::fs::write(grub_dir.join("grub.cfg"), "set timeout=5\nblscfg\n").unwrap(); + + // Empty BLS entries dir + let bls_dir = tmp.path().join("boot/loader/entries"); + std::fs::create_dir_all(&bls_dir).unwrap(); + + let ctx = OsModifierContext { + root: tmp.path().to_path_buf(), + }; + + let result = extract_boot_args_from_grub_cfg(&ctx); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("no non-recovery BLS entry found"), + "Error should mention no BLS entries, got: {err_msg}" + ); + } } diff --git a/crates/osutils/src/grub.rs b/crates/osutils/src/grub.rs index 92782bbf78..352064bee5 100644 --- a/crates/osutils/src/grub.rs +++ b/crates/osutils/src/grub.rs @@ -231,9 +231,18 @@ impl GrubConfig { } /// Update the search command in the GRUB config. + /// + /// Three variants of the GRUB stub `search` line exist in practice: + /// + /// 1. The upstream legacy form: `search -n -u -s` + /// 2. AZL3 / standard form: `search --no-floppy --fs-uuid --set=root ` + /// 3. AZL4 / Fedora-based form: `search --fs-uuid --set=root ` + /// (`--no-floppy` is a Mariner-specific convention; Fedora's grub2 + /// scripts don't emit it, and it's redundant on EFI machines.) pub fn update_search(&mut self, uuid: &Uuid) -> Result<(), Error> { let re = Regex::new(r"(?m)^(\s*)search -n -u [\w-]+ -s$").unwrap(); let re2 = Regex::new(r"(?m)^(\s*)search --no-floppy --fs-uuid --set=root [\w-]+$").unwrap(); + let re3 = Regex::new(r"(?m)^(\s*)search --fs-uuid --set=root [\w-]+$").unwrap(); if re.is_match(&self.contents) { self.contents = re @@ -246,6 +255,13 @@ impl GrubConfig { &format!("${{1}}search --no-floppy --fs-uuid --set=root {uuid}"), ) .to_string(); + } else if re3.is_match(&self.contents) { + self.contents = re3 + .replace( + &self.contents, + &format!("${{1}}search --fs-uuid --set=root {uuid}"), + ) + .to_string(); } else { bail!( "Unable to find search command in '{}'", @@ -953,6 +969,52 @@ mod tests { .unwrap(); } + #[test] + fn test_update_search_azl3_form() { + // AZL3 stubs use `search --no-floppy --fs-uuid --set=root `. + let mut grub_config = GrubConfig { + path: PathBuf::new(), + contents: indoc::indoc! { r#" + set timeout=0 + search --no-floppy --fs-uuid --set=root deadbeef-cafe-babe-0000-111122223333 + "# } + .to_owned(), + linux_command_line: None, + }; + + let new_uuid = Uuid::parse_str("9e6a9d2c-b7fe-4359-ac45-18b505e29d8c").unwrap(); + grub_config.update_search(&new_uuid).unwrap(); + + assert!(grub_config.contents.contains(&format!( + "search --no-floppy --fs-uuid --set=root {new_uuid}" + ))); + assert!(!grub_config.contents.contains("deadbeef")); + } + + #[test] + fn test_update_search_azl4_form() { + // AZL4 (Fedora-based) stubs omit --no-floppy. + let mut grub_config = GrubConfig { + path: PathBuf::new(), + contents: indoc::indoc! { r#" + set timeout=0 + search --fs-uuid --set=root deadbeef-cafe-babe-0000-111122223333 + "# } + .to_owned(), + linux_command_line: None, + }; + + let new_uuid = Uuid::parse_str("9e6a9d2c-b7fe-4359-ac45-18b505e29d8c").unwrap(); + grub_config.update_search(&new_uuid).unwrap(); + + assert!(grub_config + .contents + .contains(&format!("search --fs-uuid --set=root {new_uuid}"))); + assert!(!grub_config.contents.contains("deadbeef")); + // Must not accidentally insert --no-floppy. + assert!(!grub_config.contents.contains("--no-floppy")); + } + #[test] fn test_update_rootdevice() { // Define original GRUB config contents on target machine diff --git a/crates/osutils/src/mkinitrd.rs b/crates/osutils/src/mkinitrd.rs index c6ab3d2e10..d01831826f 100644 --- a/crates/osutils/src/mkinitrd.rs +++ b/crates/osutils/src/mkinitrd.rs @@ -118,6 +118,8 @@ mod functional_test { fn test_regenerate_initrd() { let pattern = if osrelease::is_azl3().unwrap() { "/boot/initramfs-*.azl3.img" + } else if osrelease::is_azl4().unwrap() { + "/boot/initramfs-*.azl4.img" } else { "/boot/initrd.img-*" }; diff --git a/crates/osutils/src/osrelease.rs b/crates/osutils/src/osrelease.rs index e51926e745..c39981c6f7 100644 --- a/crates/osutils/src/osrelease.rs +++ b/crates/osutils/src/osrelease.rs @@ -31,6 +31,11 @@ pub fn is_azl3() -> Result { Ok(OsRelease::read()?.get_distro().is_azl3()) } +/// Returns whether the host is running Azure Linux 4. +pub fn is_azl4() -> Result { + Ok(OsRelease::read()?.get_distro().is_azl4()) +} + /// Represents the contents of the /etc/os-release file. /// /// See @@ -146,6 +151,8 @@ impl OsRelease { AzureLinuxRelease::AzL2 } else if v.starts_with("3.") { AzureLinuxRelease::AzL3 + } else if v.starts_with("4.") { + AzureLinuxRelease::AzL4 } else { trace!("Unknown Azure Linux release: {v}"); AzureLinuxRelease::Other @@ -342,6 +349,10 @@ impl Distro { self == &Distro::AzureLinux(AzureLinuxRelease::AzL3) } + pub fn is_azl4(&self) -> bool { + self == &Distro::AzureLinux(AzureLinuxRelease::AzL4) + } + pub fn is_acl(&self) -> bool { self == &Distro::AzureContainerLinux } @@ -354,6 +365,7 @@ pub enum AzureLinuxRelease { Other, AzL2, AzL3, + AzL4, } #[cfg(test)] @@ -429,6 +441,41 @@ mod tests { ); } + #[test] + fn test_parse_azl4() { + let data = indoc::indoc! { + r#" + NAME="Azure Linux" + VERSION="4.0 (Four Alpha2)" + RELEASE_TYPE=development + ID=azurelinux + ID_LIKE=fedora + VERSION_ID="4.0" + VERSION_CODENAME="" + PRETTY_NAME="Azure Linux 4.0 (Four Alpha2)" + ANSI_COLOR="0;38;2;60;110;180" + LOGO=azurelinux-logo-icon + CPE_NAME="cpe:/o:azurelinuxproject:azurelinux:4.0" + DEFAULT_HOSTNAME="azurelinux" + HOME_URL="https://aka.ms/azurelinux" + DOCUMENTATION_URL="https://aka.ms/azurelinux" + SUPPORT_URL="https://aka.ms/azurelinux" + BUG_REPORT_URL="https://aka.ms/azurelinux" + SUPPORT_END=2026-05-15 + "#, + }; + + let os_release = OsRelease::parse(data); + assert_eq!(os_release.id, Some("azurelinux".to_string())); + assert_eq!(os_release.version_id, Some("4.0".to_string())); + assert_eq!(os_release.id_like, Some("fedora".to_string())); + assert_eq!(os_release.release_type, Some("development".to_string())); + assert_eq!( + os_release.get_distro(), + Distro::AzureLinux(AzureLinuxRelease::AzL4) + ); + } + #[test] fn test_parse_extension_release() { let data = indoc::indoc! { diff --git a/crates/osutils/src/sfdisk.rs b/crates/osutils/src/sfdisk.rs index 81eef21c71..f40276ad95 100644 --- a/crates/osutils/src/sfdisk.rs +++ b/crates/osutils/src/sfdisk.rs @@ -197,6 +197,61 @@ pub fn get_disk_uuid(disk: &Path) -> Result, Error> { Ok(Some(uuid)) } +/// Sets the disk-id (GPT header DiskGUID) of the given disk via sfdisk. +/// +/// `uuid` must parse as a valid GUID; this is checked before invoking +/// sfdisk so an accidental flag-like string (e.g. `--foo`) is rejected +/// here rather than mis-interpreted by sfdisk as an option. +/// +/// `--no-reread` + `--no-tell-kernel` are passed because the typical +/// caller is `trident offline-initialize` inside MIC's chroot, where +/// the disk's partitions are bind-mounted into the chroot. Requesting +/// `BLKRRPART` on a disk with mounted partitions returns EBUSY; we +/// only care about updating the on-disk GPT here. +pub fn set_disk_uuid(disk: &Path, uuid: &str) -> Result<(), Error> { + uuid::Uuid::parse_str(uuid) + .with_context(|| format!("'{uuid}' is not a valid GUID for sfdisk --disk-id"))?; + Dependency::Sfdisk + .cmd() + .arg("--no-reread") + .arg("--no-tell-kernel") + .arg("--disk-id") + .arg(disk) + .arg(uuid) + .run_and_check() + .context(format!( + "Failed to set disk-id on {} to {uuid}", + disk.display() + ))?; + Ok(()) +} + +/// Sets the GPT partition UUID for a specific partition by number on the +/// given disk. +/// +/// `uuid` is validated as a GUID first to avoid sfdisk mis-interpreting +/// a flag-like argument. `--no-reread` / `--no-tell-kernel` mirror +/// [`set_disk_uuid`] for safety inside MIC chroots with mounted +/// partitions. +pub fn set_part_uuid(disk: &Path, partition_number: usize, uuid: &str) -> Result<(), Error> { + uuid::Uuid::parse_str(uuid) + .with_context(|| format!("'{uuid}' is not a valid GUID for sfdisk --part-uuid"))?; + Dependency::Sfdisk + .cmd() + .arg("--no-reread") + .arg("--no-tell-kernel") + .arg("--part-uuid") + .arg(disk) + .arg(partition_number.to_string()) + .arg(uuid) + .run_and_check() + .context(format!( + "Failed to set partition UUID on {} partition {partition_number} to {uuid}", + disk.display() + ))?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/osutils/src/testutils/osrelease.rs b/crates/osutils/src/testutils/osrelease.rs index 6feff02bc6..27a2e5b173 100644 --- a/crates/osutils/src/testutils/osrelease.rs +++ b/crates/osutils/src/testutils/osrelease.rs @@ -38,11 +38,36 @@ const AZURE_LINUX_3_OS_RELEASE: &str = indoc::indoc! { "#, }; +/// Azure Linux 4.0 sample os-release file. +const AZURE_LINUX_4_OS_RELEASE: &str = indoc::indoc! { + r#" + NAME="Azure Linux" + VERSION="4.0 (Cloud Variant Beta)" + RELEASE_TYPE=development + ID=azurelinux + ID_LIKE=fedora + VERSION_ID="4.0" + VERSION_CODENAME="" + PRETTY_NAME="Azure Linux 4.0 (Cloud Variant Beta)" + ANSI_COLOR="0;38;2;60;110;180" + LOGO=azurelinux-logo-icon + CPE_NAME="cpe:/o:azurelinuxproject:azurelinux:4.0" + DEFAULT_HOSTNAME="azurelinux" + HOME_URL="https://aka.ms/azurelinux" + DOCUMENTATION_URL="https://aka.ms/azurelinux" + SUPPORT_URL="https://aka.ms/azurelinux" + BUG_REPORT_URL="https://aka.ms/azurelinux" + VARIANT="Cloud Variant" + VARIANT_ID=cloud + "#, +}; + /// Creates a mock /etc/os-release file with the given Azure Linux release. pub fn make_mock_os_release(root_path: &Path, azl_release: AzureLinuxRelease) -> Result<(), Error> { let os_release_content = match azl_release { AzureLinuxRelease::AzL2 => AZURE_LINUX_2_OS_RELEASE, AzureLinuxRelease::AzL3 => AZURE_LINUX_3_OS_RELEASE, + AzureLinuxRelease::AzL4 => AZURE_LINUX_4_OS_RELEASE, AzureLinuxRelease::Other => bail!("Unsupported Azure Linux release 'other'"), }; diff --git a/crates/trident/src/engine/boot/grub.rs b/crates/trident/src/engine/boot/grub.rs index b345f5c315..06949a6afb 100644 --- a/crates/trident/src/engine/boot/grub.rs +++ b/crates/trident/src/engine/boot/grub.rs @@ -63,9 +63,10 @@ pub(super) fn update_configs(ctx: &EngineContext) -> Result<(), Error> { let boot_grub_config_path = Path::new(ROOT_MOUNT_POINT_PATH).join(GRUB2_CONFIG_RELATIVE_PATH); // Update GRUB config on the boot device (volume holding /boot) - match ctx.host_os_release.get_distro() { - Distro::AzureLinux(AzureLinuxRelease::AzL3) => { - update_grub_config_azl3(ctx, &root_device_path, &boot_grub_config_path)?; + // Use the *image* distro (the OS being installed), not the host (MOS ISO). + match ctx.image_distro() { + Distro::AzureLinux(AzureLinuxRelease::AzL3 | AzureLinuxRelease::AzL4) => { + update_grub_config(ctx, &root_device_path, &boot_grub_config_path)?; } d => bail!("Unsupported distro for GRUB config update: {d:?}"), @@ -85,8 +86,8 @@ pub(super) fn update_configs(ctx: &EngineContext) -> Result<(), Error> { )) } -/// Updates the GRUB config for Azure Linux 3.0 using OS modifier. -fn update_grub_config_azl3( +/// Updates the GRUB config for Azure Linux (3.0 and 4.0) using OS modifier. +fn update_grub_config( ctx: &EngineContext, root_device_path: &Path, boot_grub_config_path: &Path, @@ -103,7 +104,7 @@ fn update_grub_config_azl3( .context("Failed to disable default cloud-init network config")?; } - debug!("Updating GRUB config for Azure Linux 3.0 with OS modifier"); + debug!("Updating GRUB config with OS modifier"); // OS modifier will read values of verity, selinux, root device, and overlay from original GRUB config // stamp them into /etc/default/grub and regenerate the GRUB config using grub-mkconfig. diff --git a/crates/trident/src/engine/context/mod.rs b/crates/trident/src/engine/context/mod.rs index 7481c55035..8a21e733c2 100644 --- a/crates/trident/src/engine/context/mod.rs +++ b/crates/trident/src/engine/context/mod.rs @@ -450,8 +450,20 @@ impl EngineContext { } /// Retrieves the distribution of the OS image. + /// + /// Prefers the image's own os-release (e.g., from the COSI being installed). + /// Falls back to the host os-release only when no image is mounted + /// (functional tests, runtime operations outside an install flow). + /// + /// If an image IS present but its distro is unrecognized, the image's + /// distro is returned as-is (Distro::Other) so callers can bail + /// explicitly rather than silently using the host's distro. pub(crate) fn image_distro(&self) -> Distro { - self.image_os_release().get_distro() + if self.image.is_some() { + self.image_os_release().get_distro() + } else { + self.host_os_release.get_distro() + } } } diff --git a/crates/trident/src/init/offline/mod.rs b/crates/trident/src/init/offline/mod.rs index cdbeee23d6..2d0d77c124 100644 --- a/crates/trident/src/init/offline/mod.rs +++ b/crates/trident/src/init/offline/mod.rs @@ -11,7 +11,7 @@ use anyhow::{bail, Error}; use log::{debug, info, trace, warn}; use maplit::hashmap; -use osutils::lsblk; +use osutils::{lsblk, sfdisk}; use sysdefs::partition_types::DiscoverablePartitionType; use trident_api::{ config::{ @@ -256,22 +256,128 @@ fn generate_host_status( .structured(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) .message("Failed to find root device in lsblk output")?; - let disk_uuid = lsblk_device + let disk_uuid = match lsblk_device .ptuuid .clone() .and_then(|ptuuid| ptuuid.as_uuid()) - .structured(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) - .message("No UUID found for root device")?; + { + Some(uuid) => uuid, + None => { + // lsblk didn't surface a PTUUID. This can happen in chroot + // environments (e.g. image-customizer / MIC) where the + // exposed loop device has partition children but the GPT + // disk-id either isn't set on the partition table or isn't + // populated by lsblk's PTUUID column. Fall back to sfdisk + // (which reads the GPT directly), and if that also reports + // no disk-id, mint one and persist it so the resulting + // image carries it forward to runtime. + let disk_dev_path = PathBuf::from("/dev").join(&lsblk_device.name); + warn!( + "PTUUID not reported by lsblk for {}; falling back to sfdisk", + disk_dev_path.display() + ); + let from_sfdisk = sfdisk::get_disk_uuid(&disk_dev_path) + .structured(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) + .message("Failed to read GPT disk-id via sfdisk")? + .and_then(|u| u.as_uuid()); + match from_sfdisk { + Some(uuid) => uuid, + None => { + let new_uuid = uuid::Uuid::new_v4(); + warn!( + "No GPT disk-id present on {}; assigning {}", + disk_dev_path.display(), + new_uuid + ); + sfdisk::set_disk_uuid(&disk_dev_path, &new_uuid.to_string()) + .structured( + ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment, + ) + .message(format!( + "Failed to assign GPT disk-id on {}", + disk_dev_path.display() + ))?; + new_uuid + } + } + } + }; lsblk_device.children.sort_by_key(|p| p.partn); - for (i, part) in lsblk_device.children.iter().enumerate() { - if part.part_uuid.is_none() { - return Err(TridentError::new( - ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment, - )) - .message(format!("No part UUID found for partition {}", i + 1)); + // Compute disk_dev_path once for partition-UUID fallback below. + let disk_dev_path = PathBuf::from("/dev").join(&lsblk_device.name); + + // For each partition, ensure we have a usable PARTUUID. Mirror the + // disk-id fallback above: prefer lsblk, then sfdisk, then mint a + // fresh one and persist it via sfdisk. Some chroot environments + // don't surface PARTUUID via lsblk --output-all and may also leave + // the value unset on the underlying GPT. + for (i, part) in lsblk_device.children.iter_mut().enumerate() { + if part.part_uuid.as_ref().and_then(|u| u.as_uuid()).is_some() { + continue; } + let partn = part.partn.unwrap_or((i + 1) as u32) as usize; + warn!( + "PARTUUID not reported by lsblk for partition {} on {}; falling back to sfdisk", + partn, + disk_dev_path.display() + ); + // Re-read the disk via sfdisk -J to find any UUID already present + // on this partition (sfdisk reads the GPT directly). + let sf_info = sfdisk::SfDisk::get_info(&disk_dev_path) + .structured(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) + .message(format!( + "Failed to read GPT info via sfdisk for {}", + disk_dev_path.display() + ))?; + if let Some(existing) = sf_info + .partitions + .iter() + .find(|p| p.number == partn) + .and_then(|p| p.id.as_uuid()) + { + // Reuse the UUID sfdisk read straight from the GPT for this + // partition rather than generating a new one. + part.part_uuid = Some(existing.to_string().into()); + continue; + } + + let new_uuid = uuid::Uuid::new_v4(); + warn!( + "Partition {} on {} has no PARTUUID; assigning {}", + partn, + disk_dev_path.display(), + new_uuid + ); + sfdisk::set_part_uuid(&disk_dev_path, partn, &new_uuid.to_string()) + .structured(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) + .message(format!( + "Failed to assign PARTUUID on partition {} of {}", + partn, + disk_dev_path.display() + ))?; + + // Re-read via sfdisk to confirm the write landed and to re-fetch the + // UUID straight from the GPT rather than trusting our in-memory copy. + let written_uuid = sfdisk::SfDisk::get_info(&disk_dev_path) + .structured(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) + .message(format!( + "Failed to re-read GPT info via sfdisk for {} after writing partition UUID", + disk_dev_path.display() + ))? + .partitions + .iter() + .find(|p| p.number == partn) + .and_then(|p| p.id.as_uuid()) + .ok_or_else(|| { + TridentError::new(ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment) + }) + .message(format!( + "sfdisk reported no PARTUUID for partition {} after writing {}", + partn, new_uuid + ))?; + part.part_uuid = Some(written_uuid.to_string().into()); } // Get partition paths created from combining Prism history and lsblk output. @@ -494,12 +600,21 @@ pub fn execute( trace!("Prism history contents:\n{history_file}"); + // Note: `disk` is the *runtime* device path that will be written + // into the datastore (e.g. /dev/sda). At build time inside Prism's + // chroot, this path generally does not exist because the disk is + // exposed as a loop device (the actual build-time device is + // auto-detected below by walking lsblk for the mount at "/"). + // Older code asserted that `disk` exist at build time, but that + // check tested the wrong invariant and broke AZL4 image builds + // where MIC does not bind a /dev/sda node into the chroot. let disk_path = Path::new(disk); if !disk_path.exists() { - return Err(TridentError::new( - ExecutionEnvironmentMisconfigurationError::PrismChrootEnvironment, - )) - .message(format!("Prism chroot environment doesn't contain {disk}")); + debug!( + "Runtime disk path {} not present in build environment; \ + this is expected when running inside MIC's chroot.", + disk_path.display() + ); } let history: Vec = diff --git a/crates/trident/src/subsystems/esp.rs b/crates/trident/src/subsystems/esp.rs index d09e3a8694..75c012887f 100644 --- a/crates/trident/src/subsystems/esp.rs +++ b/crates/trident/src/subsystems/esp.rs @@ -6,7 +6,7 @@ use std::{ }; use anyhow::{bail, ensure, Context, Error}; -use log::{debug, trace}; +use log::{debug, trace, warn}; use reqwest::Url; use tempfile::{NamedTempFile, TempDir}; @@ -313,8 +313,12 @@ fn copy_file_artifacts( )?; } } - } else { - // In non-UKI mode, bail if grub_noprefix.efi is not found in the image. + } else if ctx.image_distro().is_azl3() { + // AZL3 ships two GRUB variants: grub2-efi-binary (prefix-relative + // config lookup) and grub2-efi-binary-noprefix (root-device-relative + // config lookup). Trident's A/B update path requires the noprefix + // variant. If the image shipped the wrong one, fail early rather + // than producing an unbootable machine. ensure!( grub_noprefix || ctx @@ -581,7 +585,6 @@ fn copy_boot_files( esp_dir: &Path, boot_files: Vec, ) -> Result { - // Track whether grub-noprefix.efi is used let mut no_prefix = false; // Copy the specified files from temp_mount_path to esp_dir_path for boot_file in boot_files.iter() { @@ -628,6 +631,69 @@ fn copy_boot_files( Ok(no_prefix) } +/// Search EFI vendor directories for a specific binary. +/// +/// UEFI convention: each OS vendor installs its bootloader under +/// `EFI//` (e.g., `EFI/fedora/`, `EFI/azurelinux/`). +/// This function searches all subdirectories of the EFI directory +/// for the specified binary, skipping the BOOT fallback directory. +/// +/// Vendor dirs are iterated in sorted (lexicographic) order so the +/// selection is reproducible across builds when more than one vendor +/// directory contains a candidate. `read_dir` order alone is +/// filesystem-dependent (ext4 returns hash order, FAT returns +/// directory-entry order), which would produce irreproducible ESP +/// images on cross-builds and break attestation/PCR lock for the +/// selected bootloader. +fn find_efi_binary_in_vendor_dirs(efi_dir: &Path, binary_name: &str) -> Option { + let entries = match std::fs::read_dir(efi_dir) { + Ok(e) => e, + Err(e) => { + debug!("Cannot read EFI directory '{}': {}", efi_dir.display(), e); + return None; + } + }; + + // Materialize entries first so we can sort, and so a per-entry + // iterator error is logged instead of silently dropped. + let mut paths: Vec = Vec::new(); + for entry in entries { + match entry { + Ok(e) => paths.push(e.path()), + Err(e) => warn!( + "Failed to read entry under EFI directory '{}': {}", + efi_dir.display(), + e + ), + } + } + paths.sort(); + + for path in paths { + if !path.is_dir() { + continue; + } + + // Skip the BOOT directory (already checked by the caller) + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + if name.eq_ignore_ascii_case("BOOT") { + continue; + } + } + + let candidate = path.join(binary_name); + if candidate.exists() && candidate.is_file() { + debug!( + "Found GRUB EFI executable in vendor directory: '{}'", + candidate.display() + ); + return Some(candidate); + } + } + + None +} + /// Generates a list of filepaths to the boot files that need to be copied to implement file-based /// update of ESP, relative to the mounted directory. /// @@ -665,24 +731,35 @@ fn generate_boot_filepaths(temp_mount_dir: &Path, is_uki: bool) -> Result