diff --git a/internal/scheduling/reservations/commitments/committed_resource_controller_test.go b/internal/scheduling/reservations/commitments/committed_resource_controller_test.go index d43912ee2..5843bb3d9 100644 --- a/internal/scheduling/reservations/commitments/committed_resource_controller_test.go +++ b/internal/scheduling/reservations/commitments/committed_resource_controller_test.go @@ -126,6 +126,17 @@ func newCRTestClient(scheme *runtime.Scheme, objects ...client.Object) client.Cl } return []string{cr.Spec.ProjectID} }). + WithIndex(&v1alpha1.Reservation{}, idxReservationByAllocationVMUUID, func(obj client.Object) []string { + res, ok := obj.(*v1alpha1.Reservation) + if !ok || res.Spec.CommittedResourceReservation == nil { + return nil + } + uuids := make([]string, 0, len(res.Spec.CommittedResourceReservation.Allocations)) + for vmUUID := range res.Spec.CommittedResourceReservation.Allocations { + uuids = append(uuids, vmUUID) + } + return uuids + }). Build() } diff --git a/internal/scheduling/reservations/commitments/field_index.go b/internal/scheduling/reservations/commitments/field_index.go index 1f3733615..1237d0da5 100644 --- a/internal/scheduling/reservations/commitments/field_index.go +++ b/internal/scheduling/reservations/commitments/field_index.go @@ -18,15 +18,17 @@ const idxCommittedResourceByUUID = "spec.commitmentUUID" const idxCommittedResourceByProjectID = "spec.projectID" const idxReservationByCommitmentUUID = "spec.committedResourceReservation.commitmentUUID" const idxProjectQuotaByProjectID = "spec.projectID" +const idxReservationByAllocationVMUUID = "spec.committedResourceReservation.allocations" // once guards ensure each field index is registered exactly once. // Both CommittedResourceController and UsageReconciler call indexCommittedResourceByUUID; // the underlying cache returns "indexer conflict" on double registration. var ( - onceIndexCRByUUID sync.Once - onceIndexCRByProjectID sync.Once - onceIndexReservationByUUID sync.Once - onceIndexPQByProjectID sync.Once + onceIndexCRByUUID sync.Once + onceIndexCRByProjectID sync.Once + onceIndexReservationByUUID sync.Once + onceIndexPQByProjectID sync.Once + onceIndexReservationByAllocationVMID sync.Once ) // indexCommittedResourceByUUID registers the index used by UsageReconciler to look up @@ -128,3 +130,34 @@ func indexProjectQuotaByProjectID(ctx context.Context, mcl *multicluster.Client) }) return err } + +// indexReservationByAllocationVMUUID registers an index over all VM UUIDs present in +// Spec.CommittedResourceReservation.Allocations. This allows the reservation controller +// to efficiently find all other Reservation CRDs carrying a specific VM UUID without +// scanning every reservation in the cluster. +func indexReservationByAllocationVMUUID(ctx context.Context, mcl *multicluster.Client) (err error) { + onceIndexReservationByAllocationVMID.Do(func() { + log := logf.FromContext(ctx) + err = mcl.IndexField(ctx, + &v1alpha1.Reservation{}, + &v1alpha1.ReservationList{}, + idxReservationByAllocationVMUUID, + func(obj client.Object) []string { + res, ok := obj.(*v1alpha1.Reservation) + if !ok { + log.Error(errors.New("unexpected type"), "expected Reservation", "object", obj) + return nil + } + if res.Spec.CommittedResourceReservation == nil { + return nil + } + uuids := make([]string, 0, len(res.Spec.CommittedResourceReservation.Allocations)) + for vmUUID := range res.Spec.CommittedResourceReservation.Allocations { + uuids = append(uuids, vmUUID) + } + return uuids + }, + ) + }) + return err +} diff --git a/internal/scheduling/reservations/commitments/integration_test.go b/internal/scheduling/reservations/commitments/integration_test.go index 857cf2734..8c51a8162 100644 --- a/internal/scheduling/reservations/commitments/integration_test.go +++ b/internal/scheduling/reservations/commitments/integration_test.go @@ -359,6 +359,17 @@ func newIntgEnv(t *testing.T, initialObjects []client.Object, schedulerFn http.H } return []string{cr.Spec.CommitmentUUID} }). + WithIndex(&v1alpha1.Reservation{}, idxReservationByAllocationVMUUID, func(obj client.Object) []string { + res, ok := obj.(*v1alpha1.Reservation) + if !ok || res.Spec.CommittedResourceReservation == nil { + return nil + } + uuids := make([]string, 0, len(res.Spec.CommittedResourceReservation.Allocations)) + for vmUUID := range res.Spec.CommittedResourceReservation.Allocations { + uuids = append(uuids, vmUUID) + } + return uuids + }). Build() schedulerSrv := httptest.NewServer(schedulerFn) diff --git a/internal/scheduling/reservations/commitments/reservation_controller.go b/internal/scheduling/reservations/commitments/reservation_controller.go index 7924c7fb7..f75b152d2 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller.go +++ b/internal/scheduling/reservations/commitments/reservation_controller.go @@ -395,6 +395,13 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte res.Status.CommittedResourceReservation = &v1alpha1.CommittedResourceReservationStatus{} } + // Snapshot existing Status.Allocations before we overwrite it so we can detect + // which VM UUIDs are newly confirmed after the patch. + existingStatusAllocations := make(map[string]string, len(res.Status.CommittedResourceReservation.Allocations)) + for k, v := range res.Status.CommittedResourceReservation.Allocations { + existingStatusAllocations[k] = v + } + // Build new Status.Allocations map based on HV CRD state. newStatusAllocations := make(map[string]string) // Track allocations to remove from Spec (stale/leaving VMs). @@ -469,6 +476,19 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte res.Status.CommittedResourceReservation.Allocations = newStatusAllocations } + // Proactively remove this VM UUID from all other candidate reservations that still + // carry it in their Spec.Allocations. Only do this for VMs that are newly confirmed + // in this reconcile cycle (present in newStatusAllocations but absent in the snapshot + // taken before any patch) to avoid redundant work on subsequent reconciles. + for vmUUID := range newStatusAllocations { + if _, wasAlreadyConfirmed := existingStatusAllocations[vmUUID]; wasAlreadyConfirmed { + continue + } + if err := r.cleanupCandidateReservations(ctx, res.Name, vmUUID); err != nil { + return nil, fmt.Errorf("failed to cleanup candidate reservations for vm %s: %w", vmUUID, err) + } + } + // Patch Status patch := client.MergeFrom(old) if err := r.Status().Patch(ctx, res, patch); err != nil { @@ -487,6 +507,41 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte return result, nil } +// cleanupCandidateReservations removes vmUUID from Spec.Allocations of all Reservation CRDs +// other than the one that just confirmed the VM. This is called once per newly confirmed VM +// so that candidate slots on non-selected hosts are freed immediately rather than waiting +// for those reservations' own grace period or periodic requeue. +func (r *CommitmentReservationController) cleanupCandidateReservations(ctx context.Context, confirmedReservationName, vmUUID string) error { + logger := LoggerFromContext(ctx).WithValues("component", "controller", "vm", vmUUID) + + var candidates v1alpha1.ReservationList + if err := r.List(ctx, &candidates, client.MatchingFields{idxReservationByAllocationVMUUID: vmUUID}); err != nil { + return fmt.Errorf("failed to list candidate reservations: %w", err) + } + + for i := range candidates.Items { + candidate := &candidates.Items[i] + if candidate.Name == confirmedReservationName { + continue + } + if candidate.Spec.CommittedResourceReservation == nil { + continue + } + if _, ok := candidate.Spec.CommittedResourceReservation.Allocations[vmUUID]; !ok { + continue + } + old := candidate.DeepCopy() + delete(candidate.Spec.CommittedResourceReservation.Allocations, vmUUID) + if err := r.Patch(ctx, candidate, client.MergeFrom(old)); err != nil { + if client.IgnoreNotFound(err) != nil { + return fmt.Errorf("failed to patch candidate reservation %s: %w", candidate.Name, err) + } + } + logger.Info("removed vm from candidate reservation", "reservation", candidate.Name, "host", candidate.Status.Host) + } + return nil +} + // getPipelineForFlavorGroup returns the pipeline name for a given flavor group. func (r *CommitmentReservationController) getPipelineForFlavorGroup(flavorGroupName string, logger logr.Logger) string { // Try exact match first (e.g., "2152" -> "kvm-cr-hana") @@ -581,6 +636,10 @@ func (r *CommitmentReservationController) SetupWithManager(mgr ctrl.Manager, mcl return err } + if err := indexReservationByAllocationVMUUID(context.Background(), mcl); err != nil { + return fmt.Errorf("failed to set up reservation allocation VM UUID index: %w", err) + } + // Use WatchesMulticluster to watch Reservations across all configured clusters // (home + remotes). This is required because Reservation CRDs may be stored // in remote clusters, not just the home cluster. Without this, the controller diff --git a/internal/scheduling/reservations/commitments/reservation_controller_test.go b/internal/scheduling/reservations/commitments/reservation_controller_test.go index df6316d46..8c5817893 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller_test.go +++ b/internal/scheduling/reservations/commitments/reservation_controller_test.go @@ -421,9 +421,239 @@ func TestHypervisorToReservations(t *testing.T) { } // ============================================================================ -// Test: reconcileInstanceReservation_Success (existing test) +// Test: cleanupCandidateReservations // ============================================================================ +// TestCleanupCandidateReservations_MultiCandidate covers the BLI #409 acceptance +// criterion: VM UUID in 3 reservations, confirmed on 1, cleaned from 2. +func TestCleanupCandidateReservations_MultiCandidate(t *testing.T) { + scheme := newCRTestScheme(t) + now := time.Now() + oldTime := metav1.NewTime(now.Add(-30 * time.Minute)) + + const vmUUID = "vm-candidate-uuid" + + makeAlloc := func() map[string]v1alpha1.CommittedResourceAllocation { + return map[string]v1alpha1.CommittedResourceAllocation{ + vmUUID: { + CreationTimestamp: oldTime, + Resources: map[hv1.ResourceName]resource.Quantity{"memory": resource.MustParse("4Gi")}, + }, + } + } + + winning := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-winning"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-1", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: makeAlloc(), + }, + }, + Status: v1alpha1.ReservationStatus{Host: "host-1"}, + } + candidate2 := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-candidate-2"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-2", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: makeAlloc(), + }, + }, + Status: v1alpha1.ReservationStatus{Host: "host-2"}, + } + candidate3 := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-candidate-3"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-3", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: makeAlloc(), + }, + }, + Status: v1alpha1.ReservationStatus{Host: "host-3"}, + } + + k8sClient := newCRTestClient(scheme, winning, candidate2, candidate3) + ctrl := &CommitmentReservationController{Client: k8sClient, Scheme: scheme} + ctx := WithNewGlobalRequestID(context.Background()) + + if err := ctrl.cleanupCandidateReservations(ctx, "res-winning", vmUUID); err != nil { + t.Fatalf("cleanupCandidateReservations() error = %v", err) + } + + // winning reservation must still carry the VM UUID + var updatedWinning v1alpha1.Reservation + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(winning), &updatedWinning); err != nil { + t.Fatalf("failed to get winning reservation: %v", err) + } + if _, ok := updatedWinning.Spec.CommittedResourceReservation.Allocations[vmUUID]; !ok { + t.Errorf("winning reservation should still carry vm UUID %s", vmUUID) + } + + // both candidates must have the VM UUID removed + for _, obj := range []*v1alpha1.Reservation{candidate2, candidate3} { + var updated v1alpha1.Reservation + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(obj), &updated); err != nil { + t.Fatalf("failed to get %s: %v", obj.Name, err) + } + if _, ok := updated.Spec.CommittedResourceReservation.Allocations[vmUUID]; ok { + t.Errorf("candidate reservation %s should have vm UUID %s removed", obj.Name, vmUUID) + } + } +} + +// TestReconcileAllocations_ConfirmTriggersCandidateCleanup verifies the end-to-end flow: +// when reconcileAllocations confirms a VM on the winning reservation, the same VM UUID +// is removed from candidate reservations on other hosts. +func TestReconcileAllocations_ConfirmTriggersCandidateCleanup(t *testing.T) { + scheme := newCRTestScheme(t) + now := time.Now() + oldTime := metav1.NewTime(now.Add(-30 * time.Minute)) + + const vmUUID = "vm-confirmed-uuid" + config := ReservationControllerConfig{AllocationGracePeriod: metav1.Duration{Duration: 15 * time.Minute}} + + makeAlloc := func() map[string]v1alpha1.CommittedResourceAllocation { + return map[string]v1alpha1.CommittedResourceAllocation{ + vmUUID: { + CreationTimestamp: oldTime, + Resources: map[hv1.ResourceName]resource.Quantity{"memory": resource.MustParse("4Gi")}, + }, + } + } + + winning := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-winning"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-1", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: makeAlloc(), + }, + }, + Status: v1alpha1.ReservationStatus{ + Host: "host-1", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationStatus{ + Allocations: map[string]string{}, // not yet confirmed + }, + }, + } + candidate2 := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-candidate-2"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-2", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: makeAlloc(), + }, + }, + Status: v1alpha1.ReservationStatus{Host: "host-2"}, + } + hypervisor := newTestHypervisorCRD("host-1", []hv1.Instance{ + {ID: vmUUID, Name: "vm-name", Active: true}, + }) + + k8sClient := newCRTestClient(scheme, winning, candidate2, hypervisor) + controller := &CommitmentReservationController{Client: k8sClient, Scheme: scheme, Conf: config} + ctx := WithNewGlobalRequestID(context.Background()) + + if _, err := controller.reconcileAllocations(ctx, winning); err != nil { + t.Fatalf("reconcileAllocations() error = %v", err) + } + + // VM must be confirmed on winning reservation + var updatedWinning v1alpha1.Reservation + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(winning), &updatedWinning); err != nil { + t.Fatalf("failed to get winning reservation: %v", err) + } + if updatedWinning.Status.CommittedResourceReservation == nil || + updatedWinning.Status.CommittedResourceReservation.Allocations[vmUUID] != "host-1" { + t.Errorf("expected vm %s confirmed on host-1 in winning reservation status", vmUUID) + } + + // VM UUID must be removed from the candidate reservation + var updatedCandidate v1alpha1.Reservation + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(candidate2), &updatedCandidate); err != nil { + t.Fatalf("failed to get candidate reservation: %v", err) + } + if _, ok := updatedCandidate.Spec.CommittedResourceReservation.Allocations[vmUUID]; ok { + t.Errorf("expected vm UUID %s to be removed from candidate reservation", vmUUID) + } +} + +// TestCleanupCandidateReservations_NoDoubleCleanup verifies that a VM UUID already +// confirmed in a previous reconcile cycle (present in existingStatusAllocations) does +// not trigger a redundant cleanup call. +func TestReconcileAllocations_NoCleanupForAlreadyConfirmedVM(t *testing.T) { + scheme := newCRTestScheme(t) + now := time.Now() + oldTime := metav1.NewTime(now.Add(-30 * time.Minute)) + + const vmUUID = "vm-already-confirmed" + config := ReservationControllerConfig{AllocationGracePeriod: metav1.Duration{Duration: 15 * time.Minute}} + + // Winning reservation already has the VM in Status.Allocations from a prior cycle. + winning := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-winning"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-1", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + vmUUID: { + CreationTimestamp: oldTime, + Resources: map[hv1.ResourceName]resource.Quantity{"memory": resource.MustParse("4Gi")}, + }, + }, + }, + }, + Status: v1alpha1.ReservationStatus{ + Host: "host-1", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationStatus{ + Allocations: map[string]string{vmUUID: "host-1"}, // already confirmed + }, + }, + } + // This candidate still has the VM UUID — it should NOT be touched because the VM + // was confirmed in a previous cycle (the cleanup already ran then). + staleCandidate := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "res-stale-candidate"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + TargetHost: "host-2", + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + Allocations: map[string]v1alpha1.CommittedResourceAllocation{ + vmUUID: {CreationTimestamp: oldTime}, + }, + }, + }, + Status: v1alpha1.ReservationStatus{Host: "host-2"}, + } + hypervisor := newTestHypervisorCRD("host-1", []hv1.Instance{ + {ID: vmUUID, Name: "vm-name", Active: true}, + }) + + k8sClient := newCRTestClient(scheme, winning, staleCandidate, hypervisor) + controller := &CommitmentReservationController{Client: k8sClient, Scheme: scheme, Conf: config} + ctx := WithNewGlobalRequestID(context.Background()) + + if _, err := controller.reconcileAllocations(ctx, winning); err != nil { + t.Fatalf("reconcileAllocations() error = %v", err) + } + + // Stale candidate should be untouched — cleanup only fires on first confirmation. + var updatedCandidate v1alpha1.Reservation + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(staleCandidate), &updatedCandidate); err != nil { + t.Fatalf("failed to get stale candidate: %v", err) + } + if _, ok := updatedCandidate.Spec.CommittedResourceReservation.Allocations[vmUUID]; !ok { + t.Errorf("stale candidate should not have been touched on a re-reconcile of an already-confirmed VM") + } +} + func TestCommitmentReservationController_reconcileInstanceReservation_Success(t *testing.T) { scheme := newCRTestScheme(t)