From 7af0a7fcf6e255eab5a78a3d8f30e1301839e22a Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Thu, 7 May 2026 16:01:11 +0000 Subject: [PATCH 1/2] [multicast,instance,test-flake] reorder instance_stop and send stop before tearing down multicast member state Handles (closes) https://github.com/oxidecomputer/omicron/issues/9711 (was fixed downstream in a PR'ed branch). --- nexus/src/app/instance.rs | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index d24a401b317..37715330384 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1097,20 +1097,6 @@ impl super::Nexus { ) .await?; - // Update multicast member state for this instance to "Left" and clear - // `sled_id` - only if multicast is enabled - if self.multicast_enabled() { - self.db_datastore - .multicast_group_members_detach_by_instance( - opctx, - InstanceUuid::from_untyped_uuid(authz_instance.id()), - ) - .await?; - } - - // Activate multicast reconciler to handle switch-level changes - self.background_tasks.task_multicast_reconciler.activate(); - if let Err(e) = self .instance_request_state( opctx, @@ -1122,17 +1108,30 @@ impl super::Nexus { { if let (InstanceStateChangeError::SledAgent(inner), Some(vmm)) = (&e, state.vmm()) + && inner.vmm_gone() { - if inner.vmm_gone() { - let _ = self - .mark_vmm_failed(opctx, authz_instance, vmm, inner) - .await; - } + let _ = self + .mark_vmm_failed(opctx, authz_instance, vmm, inner) + .await; } return Err(e); } + // Detach multicast members (state -> "Left", clear `sled_id`) only + // after sled-agent has acknowledged the Stop request. Doing it + // before the request would tear down M2P/forwarding for a guest + // that is still running if the request fails. + if self.multicast_enabled() { + self.db_datastore + .multicast_group_members_detach_by_instance( + opctx, + InstanceUuid::from_untyped_uuid(authz_instance.id()), + ) + .await?; + self.background_tasks.task_multicast_reconciler.activate(); + } + self.db_datastore .instance_fetch_with_vmm(opctx, &authz_instance) .await From 1fe3a3c4bcb9a7555e374770a7c6d368802e3e1c Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Fri, 22 May 2026 15:29:42 +0000 Subject: [PATCH 2/2] [review] move multicast member detach into instance_update saga This addresses a review comment on the synchronous `multicast_group_members_detach_by_instance` call introduced by the prior reorder, as a transient DB failure would 500 the caller for a stop that already succeeded at the sled, while short-circuiting past the reconciler activation. The state would be self-healing via the reconciler, but the visible failure for a successful op would be a regression. Instead, we move the detach into the `instance_update` saga's `siu_commit_instance_updates` action, gated on `update.deprovision.is_some()`, where the saga signals an instance has reached no-active-VMM state. That saga already orchestrates terminal-VMM cleanup, so the detach fits naturally there. As a side effect, this also covers guest-initiated shutdown and sled-agent-reported failure paths that the `instance_stop` callsite hadn't covered. And, we still have the reconciler to check through things. This change also includes a reconciler nudge in `instance_stop` in the case where the saga does not fire if there were no terminal VMM transition to drive it in the first place (instead of waiting for the full reconciler pass on next tick). --- nexus/src/app/instance.rs | 16 +++++---------- nexus/src/app/sagas/instance_update/mod.rs | 24 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index b34f449e1b6..ab82d598213 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1133,17 +1133,11 @@ impl super::Nexus { return Err(e); } - // Detach multicast members (state -> "Left", clear `sled_id`) only - // after sled-agent has acknowledged the Stop request. Doing it - // before the request would tear down M2P/forwarding for a guest - // that is still running if the request fails. - if self.multicast_enabled() { - self.db_datastore - .multicast_group_members_detach_by_instance( - opctx, - InstanceUuid::from_untyped_uuid(authz_instance.id()), - ) - .await?; + // Idempotent stop: with no active VMM, the instance-update saga will + // not fire (no terminal transition to drive it), so nudge the + // reconciler to converge any stale "Joined" rows now rather than wait + // a full reconciler tick. + if state.vmm().is_none() && self.multicast_enabled() { self.background_tasks.task_multicast_reconciler.activate(); } diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 6de40e3fc04..76b232d68e1 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1267,6 +1267,30 @@ async fn siu_commit_instance_updates( } } + // Detach all multicast members once the active VMM has reached a terminal + // state, which avoids tearing down M2P/forwarding while the guest is still + // running on its sled. Covers graceful stop and failure paths alike. + if update.deprovision.is_some() && nexus.multicast_enabled() { + if let Err(e) = osagactx + .datastore() + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + { + info!(log, + "instance update: failed to detach multicast members on deprovision, next reconciler pass will retry"; + "instance_id" => %instance_id, + "error" => ?e); + } else { + info!(log, + "instance update: detached multicast members on deprovision"; + "instance_id" => %instance_id); + nexus.background_tasks.task_multicast_reconciler.activate(); + } + } + Ok(()) }