diff --git a/api/v1alpha1/nodedisruption_types.go b/api/v1alpha1/nodedisruption_types.go index 065d536..40bf784 100644 --- a/api/v1alpha1/nodedisruption_types.go +++ b/api/v1alpha1/nodedisruption_types.go @@ -53,6 +53,9 @@ type NodeDisruptionSpec struct { // StartDate when the disruption should start StartDate metav1.Time `json:"startDate,omitempty"` + // DoNotGrantBefore prevents the disruption from being granted before the specified time + DoNotGrantBefore metav1.Time `json:"doNotGrantBefore,omitempty"` + // Duration of the disruption once granted Duration metav1.Duration `json:"duration,omitempty"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index fe4c5d4..dafa0f2 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -337,6 +337,7 @@ func (in *NodeDisruptionSpec) DeepCopyInto(out *NodeDisruptionSpec) { *out = *in in.NodeSelector.DeepCopyInto(&out.NodeSelector) in.StartDate.DeepCopyInto(&out.StartDate) + in.DoNotGrantBefore.DeepCopyInto(&out.DoNotGrantBefore) out.Duration = in.Duration in.Retry.DeepCopyInto(&out.Retry) } diff --git a/chart/templates/nodedisruption-crd.yaml b/chart/templates/nodedisruption-crd.yaml index c0e9302..542ea53 100644 --- a/chart/templates/nodedisruption-crd.yaml +++ b/chart/templates/nodedisruption-crd.yaml @@ -111,6 +111,10 @@ spec: description: StartDate when the disruption should start format: date-time type: string + doNotGrantBefore: + description: DoNotGrantBefore prevents the disruption from being granted before the specified time + format: date-time + type: string type: description: Type of the node disruption type: string diff --git a/config/crd/bases/nodedisruption.criteo.com_nodedisruptions.yaml b/config/crd/bases/nodedisruption.criteo.com_nodedisruptions.yaml index 2589945..82c3975 100644 --- a/config/crd/bases/nodedisruption.criteo.com_nodedisruptions.yaml +++ b/config/crd/bases/nodedisruption.criteo.com_nodedisruptions.yaml @@ -110,6 +110,10 @@ spec: description: StartDate when the disruption should start format: date-time type: string + doNotGrantBefore: + description: DoNotGrantBefore prevents the disruption from being granted before the specified time + format: date-time + type: string type: description: Type of the node disruption type: string diff --git a/internal/controller/nodedisruption_controller.go b/internal/controller/nodedisruption_controller.go index 03c7bdd..d382e5a 100644 --- a/internal/controller/nodedisruption_controller.go +++ b/internal/controller/nodedisruption_controller.go @@ -272,10 +272,15 @@ func (ndr *SingleNodeDisruptionReconciler) getRetryDate() metav1.Time { return metav1.NewTime(retryDate) } +func (ndr *SingleNodeDisruptionReconciler) getDoNotGrantBeforeDate() metav1.Time { + return ndr.NodeDisruption.Spec.DoNotGrantBefore +} + // tryTransitionToGranted attempt to transition to the granted state but can result in either pending or rejected func (ndr *SingleNodeDisruptionReconciler) tryTransitionToGranted(ctx context.Context) (err error) { - nextRetryDate := ndr.getRetryDate() logger := log.FromContext(ctx) + doNotGrantBefore := ndr.getDoNotGrantBeforeDate() + nextRetryDate := ndr.getRetryDate() var state nodedisruptionv1alpha1.NodeDisruptionState @@ -294,7 +299,13 @@ func (ndr *SingleNodeDisruptionReconciler) tryTransitionToGranted(ctx context.Co } if !anyFailed { - state = nodedisruptionv1alpha1.Granted + if !doNotGrantBefore.IsZero() && time.Now().Before(doNotGrantBefore.Time) { + logger.Info("Grant deferred until doNotGrantBefore", "doNotGrantBefore", doNotGrantBefore.Time) + state = nodedisruptionv1alpha1.Pending + nextRetryDate = doNotGrantBefore + } else { + state = nodedisruptionv1alpha1.Granted + } } else { if !nextRetryDate.IsZero() { state = nodedisruptionv1alpha1.Pending diff --git a/internal/controller/nodedisruption_controller_test.go b/internal/controller/nodedisruption_controller_test.go index 50c2a3c..1273d36 100644 --- a/internal/controller/nodedisruption_controller_test.go +++ b/internal/controller/nodedisruption_controller_test.go @@ -360,6 +360,92 @@ var _ = Describe("NodeDisruption controller", func() { return errorsk8s.IsNotFound(err) }, timeout, interval).Should(BeTrue()) }) + + It("runs prepare and ready hooks before doNotGrantBefore while keeping the disruption pending", func() { + mockBasePath := "/api/v2" + + By("Starting an http server to receive the hook") + prepareCalledCnt := 0 + readyCalledCnt := 0 + + checkHookFn := func(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case mockBasePath + "/prepare": + prepareCalledCnt++ + case mockBasePath + "/ready": + readyCalledCnt++ + } + Expect(req.Header.Get("Content-Type")).Should(Equal("application/json")) + w.WriteHeader(http.StatusOK) + } + + mockServer := startDummyHTTPServer(checkHookFn) + defer mockServer.Close() + + By("creating a budget that accepts one disruption") + adb := &nodedisruptionv1alpha1.ApplicationDisruptionBudget{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "ApplicationDisruptionBudget", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-do-not-grant-before", + Namespace: "default", + }, + Spec: nodedisruptionv1alpha1.ApplicationDisruptionBudgetSpec{ + PodSelector: metav1.LabelSelector{MatchLabels: podLabels}, + MaxDisruptions: 1, + HookV2BasePath: nodedisruptionv1alpha1.HookSpec{ + URL: mockServer.URL + mockBasePath, + }, + }, + } + Expect(k8sClient.Create(ctx, adb)).Should(Succeed()) + + By("checking the ApplicationDisruptionBudget is synchronized") + ADBLookupKey := types.NamespacedName{Name: "test-do-not-grant-before", Namespace: "default"} + createdADB := &nodedisruptionv1alpha1.ApplicationDisruptionBudget{} + Eventually(func() []string { + err := k8sClient.Get(ctx, ADBLookupKey, createdADB) + Expect(err).Should(Succeed()) + return createdADB.Status.WatchedNodes + }, timeout, interval).Should(Equal([]string{"node1"})) + + By("creating a new NodeDisruption with doNotGrantBefore in the future") + doNotGrantBefore := metav1.NewTime(time.Now().Add(time.Hour).Truncate(time.Second)) + disruption := &nodedisruptionv1alpha1.NodeDisruption{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "NodeDisruption", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: NDName, + Namespace: NDNamespace, + }, + Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{ + NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1}, + Type: "maintenance", + DoNotGrantBefore: doNotGrantBefore, + }, + } + Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed()) + + NDLookupKey := types.NamespacedName{Name: NDName, Namespace: NDNamespace} + createdDisruption := &nodedisruptionv1alpha1.NodeDisruption{} + + By("checking the NodeDisruption stays pending after the hooks have completed") + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, NDLookupKey, createdDisruption) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(createdDisruption.Status.State).To(Equal(nodedisruptionv1alpha1.Pending)) + g.Expect(createdDisruption.Status.NextRetryDate.Time).To(Equal(doNotGrantBefore.Time)) + g.Expect(createdDisruption.Status.DisruptedDisruptionBudgets).To(HaveLen(1)) + g.Expect(createdDisruption.Status.DisruptedDisruptionBudgets[0].Preparing).To(BeTrue()) + g.Expect(createdDisruption.Status.DisruptedDisruptionBudgets[0].Ok).To(BeTrue()) + g.Expect(prepareCalledCnt).To(Equal(1)) + g.Expect(readyCalledCnt).To(Equal(1)) + }, timeout, interval).Should(Succeed()) + }) }) When("there are no budgets in the cluster", func() { @@ -571,6 +657,70 @@ var _ = Describe("NodeDisruption controller", func() { return createdDisruption.Status.State }, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Granted)) }) + + It("keeps the node disruption pending until doNotGrantBefore", func() { + // Kubernetes drops sub-second precision when persisting metav1.Time values. + doNotGrantBefore := metav1.NewTime(time.Now().Add(time.Hour).Truncate(time.Second)) + + By("creating a new NodeDisruption with doNotGrantBefore in the future") + disruption := &nodedisruptionv1alpha1.NodeDisruption{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "NodeDisruption", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: NDName, + Namespace: NDNamespace, + }, + Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{ + NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1}, + Type: "maintenance", + DoNotGrantBefore: doNotGrantBefore, + Retry: nodedisruptionv1alpha1.RetrySpec{ + Enabled: true, + }, + }, + } + Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed()) + + By("checking the NodeDisruption stays pending and retries at doNotGrantBefore") + createdDisruption := &nodedisruptionv1alpha1.NodeDisruption{} + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, NDLookupKey, createdDisruption) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(createdDisruption.Status.State).To(Equal(nodedisruptionv1alpha1.Pending)) + g.Expect(createdDisruption.Status.NextRetryDate.Time).To(Equal(doNotGrantBefore.Time)) + }, timeout, interval).Should(Succeed()) + }) + + It("grants the node disruption when doNotGrantBefore is in the past", func() { + By("creating a new NodeDisruption with doNotGrantBefore in the past") + disruption := &nodedisruptionv1alpha1.NodeDisruption{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "nodedisruption.criteo.com/v1alpha1", + Kind: "NodeDisruption", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: NDName, + Namespace: NDNamespace, + }, + Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{ + NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1}, + Type: "maintenance", + DoNotGrantBefore: metav1.NewTime(time.Now().Add(-time.Minute)), + }, + } + Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed()) + + By("checking the NodeDisruption is still granted") + Eventually(func() nodedisruptionv1alpha1.NodeDisruptionState { + err := k8sClient.Get(ctx, NDLookupKey, disruption) + if err != nil { + panic("should be able to get") + } + return disruption.Status.State + }, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Granted)) + }) }) When("a node disruption's deadline is in the past", func() {