diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index d0b77f6ff3f..ab82d598213 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1112,20 +1112,6 @@ impl super::Nexus { ) .await?; - // Update multicast member state for this instance to "Left" and clear - // `sled_id` - only if multicast is enabled - if self.multicast_enabled() { - self.db_datastore - .multicast_group_members_detach_by_instance( - opctx, - InstanceUuid::from_untyped_uuid(authz_instance.id()), - ) - .await?; - } - - // Activate multicast reconciler to handle switch-level changes - self.background_tasks.task_multicast_reconciler.activate(); - if let Err(e) = self .instance_request_state( opctx, @@ -1137,23 +1123,24 @@ impl super::Nexus { { if let (InstanceStateChangeError::SledAgent(inner), Some(vmm)) = (&e, state.vmm()) + && let Some(reason) = inner.vmm_failure_reason() { - if let Some(reason) = inner.vmm_failure_reason() { - let _ = self - .mark_vmm_failed( - opctx, - authz_instance, - vmm, - inner, - reason, - ) - .await; - } + let _ = self + .mark_vmm_failed(opctx, authz_instance, vmm, inner, reason) + .await; } return Err(e); } + // Idempotent stop: with no active VMM, the instance-update saga will + // not fire (no terminal transition to drive it), so nudge the + // reconciler to converge any stale "Joined" rows now rather than wait + // a full reconciler tick. + if state.vmm().is_none() && self.multicast_enabled() { + self.background_tasks.task_multicast_reconciler.activate(); + } + self.db_datastore .instance_fetch_with_vmm(opctx, &authz_instance) .await diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 6de40e3fc04..76b232d68e1 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1267,6 +1267,30 @@ async fn siu_commit_instance_updates( } } + // Detach all multicast members once the active VMM has reached a terminal + // state, which avoids tearing down M2P/forwarding while the guest is still + // running on its sled. Covers graceful stop and failure paths alike. + if update.deprovision.is_some() && nexus.multicast_enabled() { + if let Err(e) = osagactx + .datastore() + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + { + info!(log, + "instance update: failed to detach multicast members on deprovision, next reconciler pass will retry"; + "instance_id" => %instance_id, + "error" => ?e); + } else { + info!(log, + "instance update: detached multicast members on deprovision"; + "instance_id" => %instance_id); + nexus.background_tasks.task_multicast_reconciler.activate(); + } + } + Ok(()) }