cozystack · kvaps · May 14, 2026 · gemini-code-assist · May 14, 2026 · gemini-code-assist
diff --git a/pkg/drbd/drbdadm.go b/pkg/drbd/drbdadm.go
@@ -20,12 +20,22 @@ import (
 	"context"
 	"fmt"
 	"strings"
+	"time"
 
 	"github.com/cockroachdb/errors"
 
 	"github.com/cozystack/blockstor/pkg/storage"
 )
 
+// DownForceTimeout caps how long DownForce waits for the happy-path
+// `drbdadm down <res>` before falling back to the kernel-direct
+// teardown sequence. 15s is the budget the satellite daemonset's
+// terminationGracePeriod (30s) and per-resource preStop budget can
+// both afford — long enough for a healthy peer to ack the
+// disconnect, short enough that a suspended-quorum slot can't stall
+// the deletion path indefinitely (Bug 82).
+const DownForceTimeout = 15 * time.Second
+
 // Adm is a thin wrapper around the `drbdadm` CLI. It exists so the
 // satellite reconciler can be unit-tested without a real DRBD kernel
 // module present: production injects storage.RealExec, tests inject
@@ -51,6 +61,75 @@ func (a *Adm) Down(ctx context.Context, resource string) error {
 	return a.run(ctx, "down", resource)
 }
 
+// DownForce tears the resource down with a bounded timeout, falling
+// back to the kernel-direct sequence when the happy-path
+// `drbdadm down <res>` doesn't return in time (Bug 82).
+//
+// Why the happy-path can hang: when DRBD loses quorum
+// (`suspended:quorum`) mid-delete, `drbdadm down` blocks waiting
+// for the resource to leave the suspended state, which only
+// resolves if a peer comes back. Operators see this as
+// `kubectl delete pod` hanging Terminating for many minutes and
+// only unblocking under `--force`, leaving a stale
+// `/dev/drbd<minor>` slot that breaks the next resource allocated
+// the same port→minor pair (Bug 82 dev-stand observation).
+//
+// Flow:
+//
+//  1. Run `drbdadm down <res>` under a child context with deadline =
+//     DownForceTimeout. If it returns within the budget (success or
+//     a non-hang error), we're done — the kernel slot is gone.
+//  2. On timeout / error: for each volume, force-detach the lower
+//     disk via `drbdsetup detach --force <res>/<vol>`. detach does
+//     NOT wait for quorum, so this returns even when the resource
+//     is suspended.
+//  3. Finalise with `drbdsetup down <res>` (kernel-direct slot
+//     release, bypasses drbdadm's quorum-aware wait). Returns nil
+//     on a "missing slot" exit — the goal state (slot absent) is
+//     the same.
+//
+// volumeNumbers is the set of DRBD volume slots to force-detach;
+// callers source it from VolumeDefinitions. An empty slice skips
+// the per-volume detach and relies on `drbdsetup down` alone —
+// belt-and-braces for kernels where `down` won't proceed past a
+// transient disk-attached state.
+//
+// Returns nil on success. Returns a wrapped error only when BOTH
+// the bounded `drbdadm down` AND the kernel-direct `drbdsetup
+// down` failed.
+func (a *Adm) DownForce(ctx context.Context, resource string, volumeNumbers []int32) error {
+	downCtx, cancel := context.WithTimeout(ctx, DownForceTimeout)
+	defer cancel()
+
+	err := a.run(downCtx, "down", resource)
+	if err == nil {
+		return nil
+	}
+
+	// Fallback path: force-detach every volume (no quorum wait),
+	// then kernel-direct `drbdsetup down`. Use the parent ctx so a
+	// tighter caller deadline still applies.
+	for _, vol := range volumeNumbers {
+		target := fmt.Sprintf("%s/%d", resource, vol)
+		_, _ = a.exec.Run(ctx, "drbdsetup", "detach", "--force", target)
+	}
+
+	_, downErr := a.exec.Run(ctx, "drbdsetup", "down", resource)
+	if downErr == nil {
+		return nil
+	}
+
+	// `drbdsetup down` on a missing slot exits non-zero with
+	// "No such resource" / "Unknown resource" — same goal state.
+	if strings.Contains(downErr.Error(), "No such resource") ||
+		strings.Contains(downErr.Error(), "No currently configured DRBD") ||
+		strings.Contains(downErr.Error(), "Unknown resource") {
+		return nil
+	}
+
+	return errors.Wrapf(downErr, "drbdsetup down %s after drbdadm down failed (%v)", resource, err)
+}
+
 // Adjust reconciles kernel state to the on-disk .res file. Called after
 // the ConfFileBuilder writes a new file and we need DRBD to pick up
 // changes (added/removed peers, new options).

diff --git a/pkg/drbd/drbdadm_test.go b/pkg/drbd/drbdadm_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package drbd_test
 
 import (
+	"context"
 	"errors"
 	"fmt"
 	"slices"
@@ -58,6 +59,180 @@ func TestAdmDownInvokesDrbdadm(t *testing.T) {
 	}
 }
 
+// TestAdmDownForceHappyPath: when `drbdadm down <res>` returns
+// successfully under the budget, DownForce stops after one shell-out
+// and does NOT invoke the kernel-direct fallback. Pins Bug 82's
+// happy-path expectation — the new bounded teardown stays a pure
+// `drbdadm down` on healthy resources so we don't churn netlink
+// state on every delete.
+func TestAdmDownForceHappyPath(t *testing.T) {
+	fx := storage.NewFakeExec()
+	adm := drbd.NewAdm(fx)
+
+	if err := adm.DownForce(t.Context(), "pvc-1", []int32{0, 1}); err != nil {
+		t.Fatalf("DownForce: %v", err)
+	}
+
+	calls := fx.CommandLines()
+	if !slices.Contains(calls, "drbdadm down pvc-1") {
+		t.Errorf("missing drbdadm down call: %v", calls)
+	}
+
+	// Fallback path MUST NOT run when drbdadm down succeeded.
+	for _, line := range calls {
+		if line == "drbdsetup detach --force pvc-1/0" ||
+			line == "drbdsetup detach --force pvc-1/1" ||
+			line == "drbdsetup down pvc-1" {
+			t.Errorf("fallback path ran on happy success: %q in %v", line, calls)
+		}
+	}
+}
+
+// errSuspendedQuorum is the static error used as a surrogate for
+// "drbdadm down hung on suspended:quorum" in the DownForce fallback
+// tests. DownForce only matches substrings against the drbdsetup
+// error text (not drbdadm), so this message just needs to make the
+// happy path fail; the exact wording is irrelevant.
+var errSuspendedQuorum = errors.New("test: resource is suspended: quorum")
+
+// errNoSuchResource is the static error used as a surrogate for
+// drbdsetup down's "missing slot" exit. DownForce matches the
+// title-case substring "No such resource" against the drbdsetup
+// error text, so we embed that exact phrase here (the leading
+// "test:" satisfies the ST1005 "no leading capital" lint without
+// hiding the substring DownForce's matcher looks for).
+var errNoSuchResource = errors.New("test: No such resource")
+
+// TestAdmDownForceFallback pins the Bug 82 recovery: when
+// `drbdadm down` fails (suspended-quorum surrogate: we just inject a
+// generic error), DownForce must invoke `drbdsetup detach --force`
+// for every volume and then `drbdsetup down <res>`, and must return
+// nil because the goal state (kernel slot released) was reached via
+// the fallback path.
+func TestAdmDownForceFallback(t *testing.T) {
+	fx := storage.NewFakeExec()
+	// Make `drbdadm down` fail; this is the surrogate for the
+	// real-world "suspended:quorum hung the call until ctx deadline"
+	// — the wrapper's contract is identical either way (error from
+	// the bounded sub-context triggers fallback).
+	fx.Expect("drbdadm down pvc-suspended", storage.FakeResponse{
+		Err: errSuspendedQuorum,
+	})
+
+	adm := drbd.NewAdm(fx)
+
+	err := adm.DownForce(t.Context(), "pvc-suspended", []int32{0, 1})
+	if err != nil {
+		t.Fatalf("DownForce: expected nil after fallback recovery, got %v", err)
+	}
+
+	calls := fx.CommandLines()
+
+	want := []string{
+		"drbdadm down pvc-suspended",
+		"drbdsetup detach --force pvc-suspended/0",
+		"drbdsetup detach --force pvc-suspended/1",
+		"drbdsetup down pvc-suspended",
+	}
+	for _, w := range want {
+		if !slices.Contains(calls, w) {
+			t.Errorf("missing %q in fallback sequence: %v", w, calls)
+		}
+	}
+
+	// Ordering: every detach must precede the final drbdsetup down,
+	// otherwise the kernel slot release runs before lower disks are
+	// released and the suspended-state retry stalls again.
+	downIdx := slices.Index(calls, "drbdsetup down pvc-suspended")
+	for _, vol := range []string{"pvc-suspended/0", "pvc-suspended/1"} {
+		detachIdx := slices.Index(calls, "drbdsetup detach --force "+vol)
+		if detachIdx < 0 || downIdx < 0 || detachIdx > downIdx {
+			t.Errorf("detach %s must precede drbdsetup down; got %v", vol, calls)
+		}
+	}
+}
+
+// TestAdmDownForceFallbackMissingSlot: when `drbdsetup down` reports
+// the resource is already gone ("No such resource"), DownForce returns
+// nil — the goal state is "kernel slot absent", and a missing slot
+// satisfies it. Without this branch, an idempotent re-issue (second
+// handleDelete pass after a partial first) would surface as an error.
+func TestAdmDownForceFallbackMissingSlot(t *testing.T) {
+	fx := storage.NewFakeExec()
+	fx.Expect("drbdadm down pvc-gone", storage.FakeResponse{
+		Err: errSuspendedQuorum,
+	})
+	fx.Expect("drbdsetup down pvc-gone", storage.FakeResponse{
+		Err: errNoSuchResource,
+	})
+
+	adm := drbd.NewAdm(fx)
+
+	err := adm.DownForce(t.Context(), "pvc-gone", []int32{0})
+	if err != nil {
+		t.Fatalf("DownForce: missing-slot must be a clean no-op, got %v", err)
+	}
+}
+
+// TestAdmDownForceBoundedDeadline guarantees the happy-path call
+// receives a context with a deadline set by DownForceTimeout — this
+// is the load-bearing invariant of the Bug 82 fix. The fake exec
+// inspects ctx.Deadline() on the recorded call to verify the
+// timeout was applied; without a deadline, a suspended-quorum
+// resource would block the satellite finalizer-strip path
+// indefinitely.
+func TestAdmDownForceBoundedDeadline(t *testing.T) {
+	var (
+		deadlineSeen bool
+		hasDeadline  bool
+	)
+
+	deadlineFx := &deadlineCapturingExec{
+		fx: storage.NewFakeExec(),
+		onCall: func(name string, _ []string, hasDl bool) {
+			if name == "drbdadm" {
+				deadlineSeen = true
+				hasDeadline = hasDl
+			}
+		},
+	}
+
+	adm := drbd.NewAdm(deadlineFx)
+	if err := adm.DownForce(t.Context(), "pvc-1", nil); err != nil {
+		t.Fatalf("DownForce: %v", err)
+	}
+
+	if !deadlineSeen {
+		t.Fatal("drbdadm down was never invoked")
+	}
+
+	if !hasDeadline {
+		t.Error("DownForce must invoke drbdadm down with a deadline (Bug 82 bounded teardown)")
+	}
+}
+
+// deadlineCapturingExec wraps a FakeExec to inspect ctx.Deadline()
+// per call — needed by TestAdmDownForceBoundedDeadline because
+// FakeExec drops the context.
+type deadlineCapturingExec struct {
+	fx     *storage.FakeExec
+	onCall func(name string, args []string, hasDeadline bool)
+}
+
+func (d *deadlineCapturingExec) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
+	_, hasDeadline := ctx.Deadline()
+	if d.onCall != nil {
+		d.onCall(name, args, hasDeadline)
+	}
+
+	out, err := d.fx.Run(ctx, name, args...)
+	if err != nil {
+		return out, fmt.Errorf("fake exec %s: %w", name, err)
+	}
+
+	return out, nil
+}
+
 // TestAdmAdjustInvokesDrbdadm: Adjust → `drbdadm adjust <res>`. This is
 // the reload-on-config-change path; runs after the .res file is rewritten.
 func TestAdmAdjustInvokesDrbdadm(t *testing.T) {

diff --git a/pkg/satellite/reconciler.go b/pkg/satellite/reconciler.go
@@ -278,24 +278,39 @@ func (r *Reconciler) DeleteSnapshot(ctx context.Context, req *intent.DeleteSnaps
 	return &intent.DeleteSnapshotResponse{Ok: true}, nil
 }
 
-// DeleteResource tears down a resource: drbdadm down (best-effort —
-// the kernel handles a missing one fine), DeleteVolume on every
-// requested volume_number through the named Provider, then remove
-// the .res file. Idempotent on a missing resource. Per-step errors
-// land in the response body so the controller can surface granular
-// status without aborting the rest of the cleanup.
+// DeleteResource tears down a resource: bounded drbdadm down with a
+// kernel-direct `drbdsetup detach`+`drbdsetup down` fallback when the
+// happy path is wedged by suspended-quorum (Bug 82), DeleteVolume on
+// every requested volume_number through the named Provider, then
+// remove the .res file. Idempotent on a missing resource. Per-step
+// errors land in the response body so the controller can surface
+// granular status without aborting the rest of the cleanup.
+//
+// Bug 82: the previous `Adm.Down` call had no timeout, so deleting an
+// RD whose kernel slot was `suspended:quorum` would block the entire
+// handleDelete → satellite finalizer-strip path indefinitely. With
+// the satellite finalizer stuck, operators force-stripped it and the
+// kernel slot survived as a `/dev/drbd<minor>` orphan that prevented
+// the next port→minor allocation from creating its metadata. The
+// bounded `DownForce` path collapses both failure modes: happy path
+// returns within DownForceTimeout; quorum-stuck path falls back to
+// `drbdsetup detach --force` per volume + `drbdsetup down` (neither
+// command waits for quorum). Either way the kernel slot is gone
+// before we return, and finalizer-strip becomes safe again.
 func (r *Reconciler) DeleteResource(ctx context.Context, req *intent.DeleteResourceRequest) (*intent.DeleteResourceResponse, error) {
 	var downMsg string
 
 	if r.cfg.Adm != nil {
-		err := r.cfg.Adm.Down(ctx, req.GetName())
+		err := r.cfg.Adm.DownForce(ctx, req.GetName(), req.GetVolumeNumbers())
 		if err != nil {
-			// Best-effort: a "not configured" error is fine here
-			// (resource was already torn down on a prior pass), but we
-			// still want to surface the message back to the controller
-			// so it shows up in the gRPC response. Don't fail — DRBD
-			// down errors shouldn't block the storage cleanup.
-			downMsg = "drbdadm down: " + err.Error()
+			// Best-effort: a "not configured" / "no such resource"
+			// error here is fine (resource was already torn down on a
+			// prior pass), but we still want to surface the message
+			// back to the controller so it shows up in the response.
+			// Don't fail — even a fully-failed DownForce shouldn't
+			// block storage cleanup; the storage sweeper is the
+			// wider-scope backstop.
+			downMsg = "drbdadm down (force): " + err.Error()
 		}
 	}