From 1c42e65e51634417c1f85316946cff857a73e63d Mon Sep 17 00:00:00 2001 From: Pujol Date: Wed, 17 Jun 2026 19:49:04 +0200 Subject: [PATCH 1/2] feat: add cluster-based e2e testing infrastructure Add cluster mode for running gNMI controller tests against a Kind cluster. Tests are isolated via //go:build cluster tag and run with: make test-e2e-cluster PROVIDER=cisco-nxos-gnmi New files: - cluster_suite_test.go: Ginkgo suite with SynchronizedBeforeSuite for parallel execution - cluster_test.go: Manager setup tests and reconciliation tests - testutil/cluster.go: ClusterEnvironment helper for kubectl operations - testdata/cisco-nxos-gnmi/interfaces.txt: Test fixture for Interface reconciliation Makefile additions: - test-e2e-cluster: Run cluster tests with -tags=cluster - test-e2e-envtest: (placeholder) Run envtest tests with -tags=envtest - PROVIDER variable for selecting test provider Signed-off-by: Pujol --- Makefile | 40 +- config/develop/manager_patch.yaml | 1 + test/e2e/cluster_suite_test.go | 99 +++ test/e2e/cluster_test.go | 577 ++++++++++++++++++ test/e2e/e2e_suite_test.go | 2 + test/e2e/e2e_test.go | 2 + .../testdata/cisco-nxos-gnmi/interfaces.txt | 358 +++++++++++ test/e2e/testutil/cluster.go | 339 ++++++++++ test/e2e/util_test.go | 2 + 9 files changed, 1414 insertions(+), 6 deletions(-) create mode 100644 test/e2e/cluster_suite_test.go create mode 100644 test/e2e/cluster_test.go create mode 100644 test/e2e/testdata/cisco-nxos-gnmi/interfaces.txt create mode 100644 test/e2e/testutil/cluster.go diff --git a/Makefile b/Makefile index 1c13ac2b4..f24dc12e1 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,9 @@ $(LOCALBIN): install-gofumpt: FORCE @if ! hash gofumpt 2>/dev/null; then printf "\e[1;36m>> Installing gofumpt...\e[0m\n"; go install mvdan.cc/gofumpt@latest; fi +install-ginkgo: FORCE + @if ! hash ginkgo 2>/dev/null; then printf "\e[1;36m>> Installing ginkgo...\e[0m\n"; go install github.com/onsi/ginkgo/v2/ginkgo@latest; fi + install-kubebuilder: FORCE @set -eou pipefail; if ! hash kubebuilder 2>/dev/null; then printf "\e[1;36m>> Installing kubebuilder...\e[0m\n"; if command -v curl >/dev/null 2>&1; then GET="curl -sLo"; elif command -v wget >/dev/null 2>&1; then GET="wget -O"; else echo "Didn't find curl or wget to download kubebuilder"; exit 2; fi; BIN=$$(go env GOBIN); if [[ -z $$BIN ]]; then BIN=$$(go env GOPATH)/bin; fi; $$GET "$$BIN/kubebuilder" "https://go.kubebuilder.io/dl/latest/$$(go env GOOS)/$$(go env GOARCH)"; chmod +x "$$BIN/kubebuilder"; fi @@ -68,11 +71,15 @@ lint: FORCE bin/golangci-lint-custom ## Run golangci-lint linter @bin/golangci-lint-custom config verify @bin/golangci-lint-custom run +# PROVIDER defines which provider to test (cisco-nxos-gnmi, cisco-iosxr-gnmi, openconfig). +# Used by test-e2e-cluster and test-e2e-envtest to filter tests. +PROVIDER ?= cisco-nxos-gnmi + fmt: FORCE install-gofumpt @printf "\e[1;36m>> gofumpt -l -w .\e[0m\n" @gofumpt -l -w $(shell git ls-files '*.go' | grep -v '^internal/provider/openconfig') -# Run the e2e tests against a k8s cluster. +# Run the scaffolded e2e tests (unchanged from Kubebuilder). test-e2e: FORCE @command -v kind >/dev/null 2>&1 || { \ echo "Kind is not installed. Please install Kind manually."; \ @@ -85,6 +92,26 @@ test-e2e: FORCE @printf "\e[1;36m>> go test ./test/e2e/ -v -ginkgo.v\e[0m\n" @KIND_CLUSTER=$(KIND_CLUSTER) go test ./test/e2e/ -v -ginkgo.v +# Run gNMI controller tests in cluster mode (requires Kind cluster). +# Uses ginkgo for parallel execution. +GINKGO_PROCS ?= 4 +test-e2e-cluster: FORCE install-ginkgo + @command -v kind >/dev/null 2>&1 || { \ + echo "Kind is not installed. Please install Kind manually."; \ + exit 1; \ + } + @kind get clusters | grep -q $(KIND_CLUSTER) || { \ + echo "No Kind cluster is running. Please start a Kind cluster before running the e2e tests."; \ + exit 1; \ + } + @printf "\e[1;36m>> ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=15m -v ./test/e2e/ (PROVIDER=$(PROVIDER))\e[0m\n" + @KIND_CLUSTER=$(KIND_CLUSTER) E2E_PROVIDER=$(PROVIDER) ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=15m -v ./test/e2e/ + +# Run gNMI controller tests in envtest mode (no cluster required). +test-e2e-envtest: FORCE install-setup-envtest + @printf "\e[1;36m>> go test ./test/e2e/ -tags=envtest -v -ginkgo.v (PROVIDER=$(PROVIDER))\e[0m\n" + @KUBEBUILDER_ASSETS=$$(setup-envtest use 1.32 -p path) E2E_PROVIDER=$(PROVIDER) go test ./test/e2e/ -tags=envtest -v -ginkgo.v + docker-build: FORCE @printf "\e[1;36m>> $(CONTAINER_TOOL) build --tag=$(IMG) .\e[0m\n" @$(CONTAINER_TOOL) build --build-arg=BININFO_BUILD_DATE=$(BININFO_BUILD_DATE) --build-arg=BININFO_COMMIT_HASH=$(BININFO_COMMIT_HASH) --build-arg=BININFO_VERSION=$(BININFO_VERSION) --tag=$(IMG) . @@ -98,15 +125,16 @@ build-installer: FORCE generate install-kustomize @printf "\e[1;36m>> kustomize build config/default > dist/install.yaml\e[0m\n" @mkdir -p dist; kustomize build config/default > dist/install.yaml -# Deploy controller to the k8s cluster +# Deploy controller to the k8s cluster. +# Use PROVIDER to set the provider (default: cisco-nxos-gnmi). deploy: FORCE generate install-kustomize - @printf "\e[1;36m>> kustomize build config/default | kubectl apply -f -\e[0m\n" - @kustomize build config/default | kubectl apply -f - + @printf "\e[1;36m>> deploying controller-manager (PROVIDER=$(PROVIDER))\e[0m\n" + @kustomize build config/develop | sed 's/--provider=openconfig/--provider=$(PROVIDER)/' | kubectl apply -f - # Undeploy controller from the k8s cluster undeploy: FORCE install-kustomize - @printf "\e[1;36m>> kustomize build config/default | kubectl delete -f -\e[0m\n" - @kustomize build config/default | kubectl delete --ignore-not-found=true -f - + @printf "\e[1;36m>> undeploying controller-manager\e[0m\n" + @kustomize build config/develop | kubectl delete --ignore-not-found=true -f - # Install CRDs into the k8s cluster deploy-crds: FORCE generate install-kustomize diff --git a/config/develop/manager_patch.yaml b/config/develop/manager_patch.yaml index e07e73a48..162d5e9de 100644 --- a/config/develop/manager_patch.yaml +++ b/config/develop/manager_patch.yaml @@ -1,6 +1,7 @@ - op: replace path: /spec/template/spec/containers/0/args value: + - --metrics-bind-address=:8443 - --leader-elect=false - --health-probe-bind-address=:8081 - --provider=openconfig diff --git a/test/e2e/cluster_suite_test.go b/test/e2e/cluster_suite_test.go new file mode 100644 index 000000000..1782e905e --- /dev/null +++ b/test/e2e/cluster_suite_test.go @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build cluster + +package e2e + +import ( + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/ironcore-dev/network-operator/test/e2e/testutil" +) + +// TestCluster runs the e2e test suite in cluster mode. +// Named differently from TestE2E to avoid conflict with scaffolded e2e_suite_test.go. +func TestCluster(t *testing.T) { + RegisterFailHandler(Fail) + _, _ = fmt.Fprintf(GinkgoWriter, "Starting network-operator tests in CLUSTER mode\n") + RunSpecs(t, "e2e suite (cluster)") +} + +// SynchronizedBeforeSuite enables parallel test execution: +// - Process 1: Builds images, installs Prometheus/CertManager, deploys manager (runs first, alone) +// - All processes: Create ClusterEnvironment connection (runs after process 1 completes) +var _ = SynchronizedBeforeSuite( + // First function: runs ONLY on process 1, before other processes start + func(ctx SpecContext) []byte { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + // Note: Timeout is set in the second function for all processes + + By("Ensure that Prometheus is enabled") + cwd, err := testutil.GetProjectDir() + Expect(err).NotTo(HaveOccurred(), "Failed to get project directory") + + err = testutil.UncommentCode(cwd+"/config/default/kustomization.yaml", "#- ../prometheus", "#") + Expect(err).NotTo(HaveOccurred(), "Failed to enable Prometheus") + + // Build and load images to Kind (only process 1) + buildAndLoadImages(ctx) + + // Setup Prometheus and CertManager (only process 1) + setupClusterDependencies(ctx) + + // Deploy controller-manager (includes CRDs via make deploy) + By("deploying controller-manager") + tmpEnv := testutil.NewClusterEnvironment() + Expect(tmpEnv.Setup(ctx)).To(Succeed()) + Expect(tmpEnv.DeployManager(ctx)).To(Succeed()) + + return nil // No data to pass to other processes + }, + // Second function: runs on ALL processes after the first function completes + func(ctx SpecContext, _ []byte) { + SetDefaultEventuallyTimeout(testutil.DefaultTimeout) + SetDefaultEventuallyPollingInterval(time.Second) + + // All processes create their own ClusterEnvironment connection + By("initializing cluster environment") + testEnv = testutil.NewClusterEnvironment() + Expect(testEnv.Setup(ctx)).To(Succeed()) + }, +) + +// SynchronizedAfterSuite enables parallel test cleanup: +// - All processes: Local cleanup (runs on all processes) +// - Process 1: Uninstall shared dependencies (runs last, alone) +var _ = SynchronizedAfterSuite( + // First function: runs on ALL processes + func(ctx SpecContext) { + // Perform local cleanup (will run only once even if called from signal handler) + performCleanup() + + // Wait for all test namespaces to be fully deleted before returning. + // This ensures DeferCleanup hooks have finished deleting resources and their + // finalizers have been processed by the controller. Without this, the second + // function (UndeployManager) may delete the CRDs while resources still exist, + // causing finalizers to be stuck forever. + if testEnv != nil { + _ = testEnv.WaitForTestNamespacesGone(ctx) //nolint:errcheck // best-effort cleanup + } + }, + // Second function: runs ONLY on process 1, after all other processes complete + func(ctx SpecContext) { + // Undeploy the controller-manager + tmpEnv := testutil.NewClusterEnvironment() + _ = tmpEnv.Setup(ctx) //nolint:errcheck // best-effort cleanup + _ = tmpEnv.UndeployManager(ctx) //nolint:errcheck // best-effort cleanup + + // Uninstall Prometheus and CertManager + cleanupClusterDependencies(ctx) + }, +) diff --git a/test/e2e/cluster_test.go b/test/e2e/cluster_test.go new file mode 100644 index 000000000..4483974c9 --- /dev/null +++ b/test/e2e/cluster_test.go @@ -0,0 +1,577 @@ +// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build cluster + +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "golang.org/x/tools/txtar" + + "github.com/ironcore-dev/network-operator/test/e2e/testutil" +) + +// namespace where the project is deployed in +// tests create resources in separate namespaces +const namespace = "network-operator-system" + +// serviceAccountName created for the project +const serviceAccountName = "network-operator-controller-manager" + +// metricsServiceName is the name of the metrics service of the project +const metricsServiceName = "network-operator-controller-manager-metrics-service" + +// metricsRoleBindingName is the name of the RBAC that will be created to allow get the metrics data +const metricsRoleBindingName = "network-operator-metrics-binding" + +// image is the name of the image which will be build and loaded +// with the code source changes to be tested. +const image = "ghcr.io/ironcore-dev/network-operator:latest" + +// serverImage is the name of the image which will be built and loaded +// with the gNMI test server. +const serverImage = "ghcr.io/ironcore-dev/gnmi-test-server:latest" + +var ( + // Optional Environment Variables: + // - PROMETHEUS_INSTALL_SKIP=true: Skips Prometheus Operator installation during test setup. + // - CERT_MANAGER_INSTALL_SKIP=true: Skips CertManager installation during test setup. + // These variables are useful if Prometheus or CertManager is already installed, avoiding re-installation and conflicts. + skipPrometheusInstall = os.Getenv("PROMETHEUS_INSTALL_SKIP") == "true" + skipCertManagerInstall = os.Getenv("CERT_MANAGER_INSTALL_SKIP") == "true" + // isPrometheusOperatorAlreadyInstalled will be set true when prometheus CRDs be found on the cluster + isPrometheusOperatorAlreadyInstalled = false + // isCertManagerAlreadyInstalled will be set true when CertManager CRDs be found on the cluster + isCertManagerAlreadyInstalled = false +) + +var ( + cleanupOnce sync.Once + // testEnv is the cluster test environment. + testEnv *testutil.ClusterEnvironment +) + +func init() { + _, _ = fmt.Fprintf(GinkgoWriter, "Starting network-operator tests in CLUSTER mode\n") +} + +// Manager Setup tests run serially on a single Ginkgo process. +// These tests deploy and verify the controller-manager before reconciliation tests run in parallel. +var _ = Describe("Manager Setup", Serial, Ordered, func() { + var controllerPodName string + + // Before running the tests, set up the environment by creating the namespace, + // enforce the restricted security policy to the namespace, installing CRDs, + // and deploying the controller. + BeforeAll(func(ctx SpecContext) { + By("creating manager namespace") + cmd := exec.CommandContext(ctx, "kubectl", "create", "ns", namespace, "--dry-run=client", "-o", "yaml") + nsYaml, err := testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to generate namespace YAML") + cmd = exec.CommandContext(ctx, "kubectl", "apply", "-f", "-") + cmd.Stdin = bytes.NewBufferString(nsYaml) + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to create namespace") + + By("labeling the namespace to enforce the restricted security policy") + cmd = exec.CommandContext(ctx, "kubectl", "label", "--overwrite", "ns", namespace, "pod-security.kubernetes.io/enforce=restricted") + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy") + + By("installing CRDs") + cmd = exec.CommandContext(ctx, "make", "deploy-crds") + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to install CRDs") + + By("deploying the controller-manager") + cmd = exec.CommandContext(ctx, "make", "deploy") + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager") + }) + + // After all setup tests complete, clean up the manager. + // Note: CRDs are left installed for the parallel reconciliation tests. + AfterAll(func(ctx SpecContext) { + By("cleaning up the ClusterRoleBinding of the service account to allow access to metrics") + cmd := exec.CommandContext(ctx, "kubectl", "delete", "clusterrolebinding", metricsRoleBindingName, "--ignore-not-found") + _, err := testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to delete ClusterRoleBinding") + + By("cleaning up the curl pod for metrics") + cmd = exec.CommandContext(ctx, "kubectl", "delete", "pod", "curl-metrics", "-n", namespace, "--ignore-not-found") + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to delete curl-metrics pod") + }) + + // After each test, check for failures and collect logs, events, + // and pod descriptions for debugging. + AfterEach(func(ctx SpecContext) { + if specReport := CurrentSpecReport(); specReport.Failed() { + By("Fetching controller manager pod logs") + cmd := exec.CommandContext(ctx, "kubectl", "logs", controllerPodName, "-n", namespace) + controllerLogs, err := testutil.Run(cmd, GinkgoWriter) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n %s", controllerLogs) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Controller logs: %s", err) + } + + By("Fetching Kubernetes events") + cmd = exec.CommandContext(ctx, "kubectl", "get", "events", "-n", namespace, "--sort-by=.lastTimestamp") + eventsOutput, err := testutil.Run(cmd, GinkgoWriter) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Kubernetes events:\n%s", eventsOutput) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Kubernetes events: %s", err) + } + + By("Fetching curl-metrics logs") + cmd = exec.CommandContext(ctx, "kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := testutil.Run(cmd, GinkgoWriter) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Metrics logs:\n %s", metricsOutput) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get curl-metrics logs: %s", err) + } + + By("Fetching controller manager pod description") + cmd = exec.CommandContext(ctx, "kubectl", "describe", "pod", controllerPodName, "-n", namespace) + podDescription, err := testutil.Run(cmd, GinkgoWriter) + if err == nil { + fmt.Println("Pod description:\n", podDescription) + } else { + fmt.Println("Failed to describe controller pod") + } + } + }) + + BeforeEach(func() { + SetDefaultEventuallyTimeout(testutil.LongTimeout) + SetDefaultEventuallyPollingInterval(time.Second) + }) + + It("should run successfully", func(ctx SpecContext) { + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func(g Gomega) { + // Get the name of the controller-manager pod + cmd := exec.CommandContext( + ctx, "kubectl", "get", + "pods", "-l", "control-plane=controller-manager", + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + "-n", namespace, + ) + + podOutput, err := testutil.Run(cmd, GinkgoWriter) + g.Expect(err).NotTo(HaveOccurred(), "Failed to retrieve controller-manager pod information") + podNames := testutil.GetNonEmptyLines(podOutput) + g.Expect(podNames).To(HaveLen(1), "expected 1 controller pod running") + controllerPodName = podNames[0] + g.Expect(controllerPodName).To(ContainSubstring("controller-manager")) + + // Validate the pod's status + cmd = exec.CommandContext(ctx, "kubectl", "get", "pods", controllerPodName, "-o", "jsonpath={.status.phase}", "-n", namespace) + output, err := testutil.Run(cmd, GinkgoWriter) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status") + } + Eventually(verifyControllerUp).Should(Succeed()) + }) + + It("should ensure the metrics endpoint is serving metrics", func(ctx SpecContext) { + By("creating a ClusterRoleBinding for the service account to allow access to metrics") + // #nosec G204 + cmd := exec.CommandContext(ctx, "kubectl", "create", "clusterrolebinding", metricsRoleBindingName, "--clusterrole=network-operator-metrics-reader", fmt.Sprintf("--serviceaccount=%s:%s", namespace, serviceAccountName)) + _, err := testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to create ClusterRoleBinding") + + By("validating that the metrics service is available") + cmd = exec.CommandContext(ctx, "kubectl", "get", "service", metricsServiceName, "-n", namespace) + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Metrics service should exist") + + By("validating that the ServiceMonitor for Prometheus is applied in the namespace") + cmd = exec.CommandContext(ctx, "kubectl", "get", "ServiceMonitor", "-n", namespace) + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "ServiceMonitor should exist") + + By("getting the service account token") + token, err := serviceAccountToken(ctx) + Expect(err).NotTo(HaveOccurred()) + Expect(token).NotTo(BeEmpty()) + + By("waiting for the metrics endpoint to be ready") + verifyMetricsEndpointReady := func(g Gomega) { + kcmd := exec.CommandContext(ctx, "kubectl", "get", "endpoints", metricsServiceName, "-n", namespace) + output, kErr := testutil.Run(kcmd, GinkgoWriter) + g.Expect(kErr).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready") + } + Eventually(verifyMetricsEndpointReady).Should(Succeed()) + + By("verifying that the controller manager has started") + verifyManagerStarted := func(g Gomega) { + kcmd := exec.CommandContext(ctx, "kubectl", "logs", controllerPodName, "-n", namespace) + output, kErr := testutil.Run(kcmd, GinkgoWriter) + g.Expect(kErr).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("starting manager"), "Manager not yet started") + } + Eventually(verifyManagerStarted).Should(Succeed()) + + By("creating the curl-metrics pod to access the metrics endpoint") + // #nosec G204 + cmd = exec.CommandContext(ctx, "kubectl", "run", "curl-metrics", "--restart=Never", + "--namespace", namespace, + "--image=curlimages/curl:latest", + "--overrides", + fmt.Sprintf(`{ + "spec": { + "containers": [{ + "name": "curl", + "image": "curlimages/curl:latest", + "command": ["/bin/sh", "-c"], + "args": ["curl -v -k -H 'Authorization: Bearer %s' https://%s.%s.svc.cluster.local:8443/metrics"], + "securityContext": { + "allowPrivilegeEscalation": false, + "capabilities": { + "drop": ["ALL"] + }, + "runAsNonRoot": true, + "runAsUser": 1000, + "seccompProfile": { + "type": "RuntimeDefault" + } + } + }], + "serviceAccount": "%s" + } + }`, token, metricsServiceName, namespace, serviceAccountName)) + _, err = testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") + + By("waiting for the curl-metrics pod to complete.") + verifyCurlUp := func(g Gomega) { + cmd := exec.CommandContext(ctx, "kubectl", "get", "pods", "curl-metrics", "-o", "jsonpath={.status.phase}", "-n", namespace) + output, err := testutil.Run(cmd, GinkgoWriter) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(Equal("Succeeded"), "curl pod in wrong status") + } + Eventually(verifyCurlUp, testutil.VeryLongTimeout).Should(Succeed()) + + By("getting the metrics by checking curl-metrics logs") + metricsOutput := getMetricsOutput(ctx) + Expect(metricsOutput).To(ContainSubstring("controller_runtime_webhook_panics_total")) + }) + + It("should provisioned cert-manager", func(ctx SpecContext) { + By("validating that cert-manager has the certificate Secret") + verifyCertManager := func(g Gomega) { + cmd := exec.CommandContext(ctx, "kubectl", "get", "secrets", "webhook-server-cert", "-n", namespace) + _, err := testutil.Run(cmd, GinkgoWriter) + g.Expect(err).NotTo(HaveOccurred()) + } + Eventually(verifyCertManager).Should(Succeed()) + }) + + It("should have CA injection for validating webhooks", func(ctx SpecContext) { + By("checking CA injection for validating webhooks") + verifyCAInjection := func(g Gomega) { + cmd := exec.CommandContext(ctx, "kubectl", "get", + "validatingwebhookconfigurations.admissionregistration.k8s.io", + "network-operator-validating-webhook-configuration", + "-o", "go-template={{ range .webhooks }}{{ .clientConfig.caBundle }}{{ end }}") + vwhOutput, err := testutil.Run(cmd, GinkgoWriter) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(vwhOutput)).To(BeNumerically(">", 10)) + } + Eventually(verifyCAInjection).Should(Succeed()) + }) + + // +kubebuilder:scaffold:e2e-webhooks-checks +}) + +// Reconciliation tests run in parallel across multiple Ginkgo processes. +// Each test creates its own namespace and gnmi-test-server instance for isolation. +var _ = Describe("Reconciliation", func() { + projectDir, err := testutil.GetProjectDir() + if err != nil { + Fail(fmt.Sprintf("Failed to get project directory: %v", err)) + } + + // Get provider filter from environment (set by Makefile) + providerFilter := os.Getenv("E2E_PROVIDER") + + testdataRoot := filepath.Join(projectDir, "test", "e2e", "testdata") + providerDirs, err := os.ReadDir(testdataRoot) + if err != nil { + Fail(fmt.Sprintf("Failed to read testdata directory: %v", err)) + } + + var testFiles []string + var providerName string + for _, providerDir := range providerDirs { + if !providerDir.IsDir() { + continue + } + providerName = providerDir.Name() + + if providerFilter != "" && providerName != providerFilter { + continue + } + + providerTestdataDir := filepath.Join(testdataRoot, providerName) + + testFiles, err = filepath.Glob(filepath.Join(providerTestdataDir, "*.txt")) + if err != nil { + Fail(fmt.Sprintf("Failed to glob testdata: %v", err)) + } + break + } + + for _, testFile := range testFiles { + testName := filepath.Base(testFile) + testName = testName[:len(testName)-4] // remove .txt + + It(fmt.Sprintf("should reconcile %s/%s", providerName, testName), func(ctx SpecContext) { + By("parsing testdata file") + a, err := txtar.ParseFile(testFile) + Expect(err).NotTo(HaveOccurred(), "Failed to parse test file: %s", testFile) + + var state, preload []byte + var resources []txtar.File + for _, f := range a.Files { + switch f.Name { + case "state/expect": + state = f.Data + case "state/preload": + preload = f.Data + default: + resources = append(resources, f) + } + } + Expect(state).NotTo(BeEmpty(), "Expected '-- state/expect --' section in testdata") + Expect(resources).NotTo(BeEmpty(), "Expected at least one resource in testdata") + + By("creating test namespace") + testNamespace := fmt.Sprintf("test-%s-%s-%s", providerName, strings.ReplaceAll(testName, "_", "-"), time.Now().Format("20060102150405")) + // Truncate to 63 chars max (K8s namespace limit) + if len(testNamespace) > 63 { + testNamespace = testNamespace[:63] + } + Expect(testEnv.CreateNamespace(ctx, testNamespace)).NotTo(HaveOccurred(), "Failed to create test namespace") + + DeferCleanup(func(_ SpecContext) { + // Use a fresh context with generous timeout for cleanup + // The SpecContext may be nearly exhausted after test timeout + cleanupCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + // Clean up test resources before deleting the gnmi-test-server pod to avoid issues with finalizers that require API access. + By("deleting test resources") + _ = testEnv.DeleteCustomResources(cleanupCtx, testNamespace) //nolint:errcheck // best-effort cleanup + By("deleting test namespace") + _ = testEnv.DeleteNamespace(cleanupCtx, testNamespace) //nolint:errcheck // best-effort cleanup + }) + + deviceName := fmt.Sprintf("test-device-%d", time.Now().UnixNano()) + + By("deploying a gnmi-test-server instance for this test") + gnmiAddr, err := testEnv.DeployGNMIServer(ctx, testNamespace) + Expect(err).NotTo(HaveOccurred(), "Failed to deploy gnmi-test-server") + Expect(gnmiAddr).ToNot(BeNil()) + + By("preloading gNMI state if specified") + if len(preload) > 0 { + err = testEnv.PreloadGNMIState(ctx, testNamespace, preload) + Expect(err).NotTo(HaveOccurred(), "Failed to preload gNMI state") + } + + By("creating a test device") + device := fmt.Sprintf(` +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: Device +metadata: + name: %s + namespace: %s + labels: + %s: "" +spec: + endpoint: + address: "%s"`, deviceName, testNamespace, testutil.E2ETestLabel, gnmiAddr.String()) + err = testutil.Apply(ctx, device, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to apply Device") + + By("applying resources from testdata") + _, _ = fmt.Fprintf(GinkgoWriter, "DEBUG: Found %d resources to apply\n", len(resources)) + for _, res := range resources { + _, _ = fmt.Fprintf(GinkgoWriter, "DEBUG: Applying resource: %s\n", res.Name) + err = testutil.ApplyWithPatch(ctx, string(res.Data), testNamespace, deviceName, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to apply resource: %s", res.Name) + } + + By("waiting for resources to be configured") + for _, res := range resources { + // Extract actual kind/name from YAML since section name may differ from metadata.name + resourceID, err := testutil.ExtractResourceIdentifier(string(res.Data)) + Expect(err).NotTo(HaveOccurred(), "Failed to extract resource identifier from: %s", res.Name) + err = testutil.WaitForCondition(ctx, resourceID, testNamespace, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Resource not configured: %s", resourceID) + } + + By("verifying gNMI state matches expected JSON") + gnmiState, err := testEnv.GetGNMIState(ctx, testNamespace) + Expect(err).NotTo(HaveOccurred(), "Failed to get gNMI state") + + err = testutil.CompareJSON(string(gnmiState), string(state)) + Expect(err).NotTo(HaveOccurred(), "gNMI state does not match expected JSON") + }) + } +}) + +// serviceAccountToken returns a token for the specified service account in the given namespace. +// It uses the Kubernetes TokenRequest API to generate a token by directly sending a request +// and parsing the resulting token from the API response. +func serviceAccountToken(ctx context.Context) (string, error) { + // #nosec G101 + const tokenRequestRawString = `{ + "apiVersion": "authentication.k8s.io/v1", + "kind": "TokenRequest" + }` + + // Temporary file to store the token request + secretName := serviceAccountName + "-token-request" + tokenRequestFile := filepath.Join(os.TempDir(), secretName) + if err := os.WriteFile(tokenRequestFile, []byte(tokenRequestRawString), os.FileMode(0o644)); err != nil { + return "", err + } + + var out string + verifyTokenCreation := func(g Gomega) { + // Execute kubectl command to create the token + // #nosec G204 + cmd := exec.CommandContext(ctx, "kubectl", "create", "--raw", fmt.Sprintf("/api/v1/namespaces/%s/serviceaccounts/%s/token", namespace, serviceAccountName), "-f", tokenRequestFile) + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + + // Parse the JSON output to extract the token + var token tokenRequest + err = json.Unmarshal(output, &token) + g.Expect(err).NotTo(HaveOccurred()) + + out = token.Status.Token + } + Eventually(verifyTokenCreation).Should(Succeed()) + + return out, nil +} + +// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint. +func getMetricsOutput(ctx context.Context) string { + By("getting the curl-metrics logs") + cmd := exec.CommandContext(ctx, "kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := testutil.Run(cmd, GinkgoWriter) + Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod") + Expect(metricsOutput).To(ContainSubstring("< HTTP/1.1 200 OK")) + return metricsOutput +} + +// tokenRequest is a simplified representation of the Kubernetes TokenRequest API response, +// containing only the token field that we need to extract. +type tokenRequest struct { + Status struct { + Token string `json:"token"` + } `json:"status"` +} + +// performCleanup ensures testEnv.Teardown is called exactly once +func performCleanup() { + cleanupOnce.Do(func() { + if testEnv != nil { + fmt.Fprintf(os.Stderr, "Tearing down test environment...\n") + ctx, cancel := context.WithTimeout(context.Background(), testutil.DefaultTimeout) + defer cancel() + if err := testEnv.Teardown(ctx); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to teardown test environment: %v\n", err) + } + } + }) +} + +// setupClusterDependencies installs Prometheus and CertManager if needed. +// Called by cluster_suite_test.go. +func setupClusterDependencies(ctx SpecContext) { + if !skipPrometheusInstall { + By("checking if prometheus is installed already") + isPrometheusOperatorAlreadyInstalled = testutil.IsPrometheusCRDsInstalled(ctx, GinkgoWriter) + if !isPrometheusOperatorAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Installing Prometheus Operator...\n") + Expect(testutil.InstallPrometheusOperator(ctx, GinkgoWriter)).To(Succeed(), "Failed to install Prometheus Operator") + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "WARNING: Prometheus Operator is already installed. Skipping installation...\n") + } + } + if !skipCertManagerInstall { + By("checking if cert manager is installed already") + isCertManagerAlreadyInstalled = testutil.IsCertManagerCRDsInstalled(ctx, GinkgoWriter) + if !isCertManagerAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Installing CertManager...\n") + Expect(testutil.InstallCertManager(ctx, GinkgoWriter)).To(Succeed(), "Failed to install CertManager") + // Fresh install - need to wait for webhook to be ready (can take up to 90s) + By("waiting for cert-manager webhook to be ready (fresh install)") + Expect(testutil.WaitForCertManagerWebhook(ctx, GinkgoWriter)).To(Succeed(), "Cert-manager webhook not ready") + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "WARNING: CertManager is already installed. Skipping installation...\n") + // Already installed - webhook should be ready, but verify quickly + } + } +} + +// cleanupClusterDependencies uninstalls Prometheus and CertManager if we installed them. +// Called by cluster_suite_test.go. +func cleanupClusterDependencies(ctx SpecContext) { + if !skipPrometheusInstall && !isPrometheusOperatorAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling Prometheus Operator...\n") + testutil.UninstallPrometheusOperator(ctx, GinkgoWriter) + } + if !skipCertManagerInstall && !isCertManagerAlreadyInstalled { + _, _ = fmt.Fprintf(GinkgoWriter, "Uninstalling CertManager...\n") + testutil.UninstallCertManager(ctx, GinkgoWriter) + } +} + +// buildAndLoadImages builds and loads Docker images to Kind. +// Called by cluster_suite_test.go. +func buildAndLoadImages(ctx SpecContext) { + By("building the manager(Operator) image") + cmd := exec.CommandContext(ctx, "make", "docker-build", "IMG="+image) + _, err := testutil.Run(cmd, GinkgoWriter) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the manager(Operator) image") + + By("loading the manager(Operator) image on Kind") + err = testutil.LoadImageToKindClusterWithName(ctx, image, GinkgoWriter) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the manager(Operator) image into Kind") + + By("building the gnmi-test-server image") + cmd = exec.CommandContext(ctx, "make", "docker-build-test-gnmi-server", "TEST_SERVER_IMG="+serverImage) + _, err = testutil.Run(cmd, GinkgoWriter) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to build the gnmi-test-server image") + + By("loading the gnmi-test-server image on Kind") + err = testutil.LoadImageToKindClusterWithName(ctx, serverImage, GinkgoWriter) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the gnmi-test-server image into Kind") +} diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 3702bb802..f59c18c69 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -1,3 +1,5 @@ +//go:build !cluster && !envtest + // SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors // SPDX-License-Identifier: Apache-2.0 diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index e583ec9cb..2dabadeff 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -1,3 +1,5 @@ +//go:build !cluster && !envtest + // SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors // SPDX-License-Identifier: Apache-2.0 diff --git a/test/e2e/testdata/cisco-nxos-gnmi/interfaces.txt b/test/e2e/testdata/cisco-nxos-gnmi/interfaces.txt new file mode 100644 index 000000000..d2c520759 --- /dev/null +++ b/test/e2e/testdata/cisco-nxos-gnmi/interfaces.txt @@ -0,0 +1,358 @@ +# Integration test for Interface resources +# +# Tests interface types and NX-OS specific InterfaceConfig: +# loopback-vtep -> Loopback with IPv4 address +# uplink-spine1 -> Physical L3 with unnumbered IPv4 + BFD +# edge-port -> Physical L2 with InterfaceConfig (STP edge, BufferBoost disabled) +# host-pc -> Aggregate L2 with vPC, LACP, InterfaceConfig (STP network, LACP options) + +-- state/preload -- +{ + "System": { + "procsys-items": { + "bootTime": "1700000000" + } + } +} + +-- interfaceconfig/edge-port-config -- +apiVersion: nx.cisco.networking.metal.ironcore.dev/v1alpha1 +kind: InterfaceConfig +metadata: + name: interface-nxconfig-edge + namespace: default +spec: + bufferBoost: + enabled: false + spanningTree: + portType: Edge + bpduGuard: true + +-- interfaceconfig/host-pc-config -- +apiVersion: nx.cisco.networking.metal.ironcore.dev/v1alpha1 +kind: InterfaceConfig +metadata: + name: interface-nxconfig-po + namespace: default +spec: + lacp: + vpcConvergence: true + suspendIndividual: false + spanningTree: + portType: Network + bpduFilter: true + +-- interface/loopback-vtep -- +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: Interface +metadata: + labels: + networking.metal.ironcore.dev/device-name: device + name: loopback-vtep + namespace: default +spec: + deviceRef: + name: device + name: lo0 + description: NVE/VTEP Leaf1 + adminState: Up + type: Loopback + ipv4: + addresses: + - 10.0.0.10/32 + +-- interface/uplink-spine1 -- +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: Interface +metadata: + labels: + networking.metal.ironcore.dev/device-name: device + name: uplink-spine1 + namespace: default +spec: + deviceRef: + name: device + name: eth1/1 + description: Leaf1 to Spine1 + adminState: Up + type: Physical + mtu: 9216 + ipv4: + unnumbered: + interfaceRef: + name: loopback-vtep + bfd: + enabled: true + desiredMinimumTxInterval: 300ms + requiredMinimumReceive: 300ms + detectionMultiplier: 3 + +-- interface/edge-port -- +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: Interface +metadata: + name: edge-port + namespace: default + labels: + networking.metal.ironcore.dev/device: device +spec: + deviceRef: + name: device + name: eth1/2 + type: Physical + adminState: Up + mtu: 1500 + description: "Edge port with STP and BufferBoost config" + providerConfigRef: + apiVersion: nx.cisco.networking.metal.ironcore.dev/v1alpha1 + kind: InterfaceConfig + name: interface-nxconfig-edge + switchport: + mode: Trunk + nativeVlan: 1 + allowedVlans: + - 10 + +-- interface/host-pc -- +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: Interface +metadata: + name: host-pc + namespace: default + labels: + networking.metal.ironcore.dev/device: device +spec: + deviceRef: + name: device + name: po10 + type: Aggregate + adminState: Up + mtu: 1500 + description: "vPC to Host with STP and LACP config" + providerConfigRef: + apiVersion: nx.cisco.networking.metal.ironcore.dev/v1alpha1 + kind: InterfaceConfig + name: interface-nxconfig-po + switchport: + mode: Trunk + nativeVlan: 1 + allowedVlans: + - 10 + aggregation: + controlProtocol: + mode: Active + memberInterfaceRefs: + - name: edge-port + multichassis: + enabled: true + id: 2 + +-- state/expect -- +{ + "System": { + "bfd-items": { + "inst-items": { + "if-items": { + "If-list": [ + { + "adminSt": "enabled", + "id": "eth1/1", + "ifka-items": { + "detectMult": 3, + "minRxIntvl": 300, + "minTxIntvl": 300 + } + } + ] + } + } + }, + "fm-items": { + "bfd-items": { + "adminSt": "enabled" + }, + "lacp-items": { + "adminSt": "enabled" + } + }, + "icmpv4-items": { + "inst-items": { + "dom-items": { + "Dom-list": [ + { + "name": "default", + "if-items": { + "If-list": [ + { + "ctrl": "port-unreachable,redirect", + "id": "lo0" + }, + { + "ctrl": "port-unreachable", + "id": "eth1/1" + } + ] + } + } + ] + } + } + }, + "intf-items": { + "aggr-items": { + "AggrIf-list": [ + { + "accessVlan": "vlan-1", + "adminSt": "up", + "aggrExtd-items": { + "bufferBoost": "enable" + }, + "descr": "vPC to Host with STP and LACP config", + "id": "po10", + "lacpVpcConvergence": "enable", + "layer": "Layer2", + "medium": "broadcast", + "mode": "trunk", + "mtu": 1500, + "nativeVlan": "vlan-1", + "pcMode": "active", + "rsmbrIfs-items": { + "RsMbrIfs-list": [ + { + "tDn": "/System/intf-items/phys-items/PhysIf-list[id='eth1/2']" + } + ] + }, + "suspIndividual": "disable", + "trunkVlans": "10", + "userCfgdFlags": "admin_layer,admin_mtu,admin_state" + } + ] + }, + "lb-items": { + "LbRtdIf-list": [ + { + "adminSt": "up", + "descr": "NVE/VTEP Leaf1", + "id": "lo0", + "rtvrfMbr-items": { + "tDn": "/System/inst-items/Inst-list[name='default']" + } + } + ] + }, + "phys-items": { + "PhysIf-list": [ + { + "FECMode": "auto", + "accessVlan": "unknown", + "adminSt": "up", + "descr": "Leaf1 to Spine1", + "id": "eth1/1", + "layer": "Layer3", + "medium": "p2p", + "mode": "access", + "mtu": 9216, + "nativeVlan": "unknown", + "physExtd-items": { + "bufferBoost": "enable" + }, + "rtvrfMbr-items": { + "tDn": "/System/inst-items/Inst-list[name='default']" + }, + "trunkVlans": "1-4094", + "userCfgdFlags": "admin_layer,admin_mtu,admin_state" + }, + { + "accessVlan": "vlan-1", + "adminSt": "up", + "descr": "Edge port with STP and BufferBoost config", + "FECMode": "auto", + "id": "eth1/2", + "layer": "Layer2", + "medium": "broadcast", + "mode": "trunk", + "mtu": 1500, + "nativeVlan": "vlan-1", + "trunkVlans": "10", + "userCfgdFlags": "admin_layer,admin_mtu,admin_state", + "physExtd-items": { + "bufferBoost": "disable" + } + } + ] + } + }, + "ipv4-items": { + "inst-items": { + "dom-items": { + "Dom-list": [ + { + "name": "default", + "if-items": { + "If-list": [ + { + "addr-items": { + "Addr-list": [ + { + "addr": "10.0.0.10/32", + "pref": 0, + "tag": 0, + "type": "primary" + } + ] + }, + "id": "lo0" + }, + { + "id": "eth1/1", + "unnumbered": "lo0" + } + ] + } + } + ] + } + } + }, + "stp-items": { + "inst-items": { + "if-items": { + "If-list": [ + { + "id": "eth1/2", + "mode": "edge", + "bpdufilter": "default", + "bpduguard": "enable" + }, + { + "id": "po10", + "mode": "network", + "bpdufilter": "enable", + "bpduguard": "default" + } + ] + } + } + }, + "vpc-items": { + "inst-items": { + "dom-items": { + "if-items": { + "If-list": [ + { + "id": 2, + "rsvpcConf-items": { + "tDn": "/System/intf-items/aggr-items/AggrIf-list[id='po10']" + } + } + ] + } + } + } + }, + "procsys-items": { + "bootTime": "1700000000" + } + } +} diff --git a/test/e2e/testutil/cluster.go b/test/e2e/testutil/cluster.go new file mode 100644 index 000000000..f34990ec9 --- /dev/null +++ b/test/e2e/testutil/cluster.go @@ -0,0 +1,339 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package testutil + +import ( + "bytes" + "context" + "fmt" + "net/netip" + "os" + "os/exec" + "strconv" + "strings" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + nxv1alpha1 "github.com/ironcore-dev/network-operator/api/cisco/nx/v1alpha1" + "github.com/ironcore-dev/network-operator/api/core/v1alpha1" +) + +var ( + // gnmiPort is the port on which the gnmi-test-server listens for gNMI requests. + gnmiPort uint16 = 9339 + // serverImage is the container image for the gnmi-test-server. + // This must match the image built by the Makefile. + serverImage = "ghcr.io/ironcore-dev/gnmi-test-server:latest" +) + +// ClusterEnvironment enables end-to-end tests to run against a real Kubernetes cluster (e.g., Kind). +// TODO: use native library instead of kubectl (follow up) +type ClusterEnvironment struct { + restConfig *rest.Config + k8sClient client.Client +} + +// NewClusterEnvironment creates a new cluster-based test environment. +func NewClusterEnvironment() *ClusterEnvironment { + return &ClusterEnvironment{} +} + +// Setup connects to the existing cluster (CRDs should already be installed). +func (c *ClusterEnvironment) Setup(ctx context.Context) error { + // Register schemes + if err := corev1.AddToScheme(scheme.Scheme); err != nil { + return err + } + if err := v1alpha1.AddToScheme(scheme.Scheme); err != nil { + return err + } + if err := nxv1alpha1.AddToScheme(scheme.Scheme); err != nil { + return err + } + + // Get REST config from kubeconfig + var err error + c.restConfig = ctrl.GetConfigOrDie() + + c.k8sClient, err = client.New(c.restConfig, client.Options{Scheme: scheme.Scheme}) + if err != nil { + return err + } + + return nil +} + +// InstallCRDs installs CRDs into the cluster. Should only be called once (from process 1). +// Uses server-side apply to handle existing CRDs gracefully. +func (c *ClusterEnvironment) InstallCRDs(ctx context.Context) error { + if err := c.runKubectl(ctx, "apply", "-k", "config/crd", "--server-side", "--force-conflicts"); err != nil { + return fmt.Errorf("failed to install CRDs: %w", err) + } + return nil +} + +// DeployManager deploys the controller-manager and waits for it to be ready. +// Should only be called once (from process 1). +// Respects E2E_PROVIDER env var for provider selection. +func (c *ClusterEnvironment) DeployManager(ctx context.Context) error { + dir, _ := GetProjectDir() //nolint:errcheck // uses current dir as fallback + env := os.Environ() + if provider := os.Getenv("E2E_PROVIDER"); provider != "" { + env = append(env, "PROVIDER="+provider) + } + + // First deploy CRDs explicitly (make deploy also does this, but let's be sure) + cmd := exec.CommandContext(ctx, "make", "deploy-crds") + cmd.Dir = dir + cmd.Env = env + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to deploy CRDs: %s: %w", string(output), err) + } + + // Then deploy the manager + cmd = exec.CommandContext(ctx, "make", "deploy") + cmd.Dir = dir + cmd.Env = env + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to deploy manager: %s: %w", string(output), err) + } + + if err := c.runKubectl(ctx, "wait", "deployment/network-operator-controller-manager", + "-n", "network-operator-system", + "--for", "condition=Available", + "--timeout", "2m"); err != nil { + return fmt.Errorf("manager not ready: %w", err) + } + return nil +} + +// UndeployManager undeploys the controller-manager. +func (c *ClusterEnvironment) UndeployManager(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "make", "undeploy") + dir, _ := GetProjectDir() //nolint:errcheck // uses current dir as fallback + cmd.Dir = dir + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to undeploy manager: %s: %w", string(output), err) + } + return nil +} + +// Teardown cleans up CRDs. +func (c *ClusterEnvironment) Teardown(ctx context.Context) error { + _ = c.runKubectl(ctx, "delete", "-k", "config/crd", "--ignore-not-found") //nolint:errcheck // best-effort cleanup + return nil +} + +// Client returns the Kubernetes client. +func (c *ClusterEnvironment) Client() client.Client { + return c.k8sClient +} + +// RESTConfig returns the REST config. +func (c *ClusterEnvironment) RESTConfig() *rest.Config { + return c.restConfig +} + +// DeployGNMIServer deploys the gnmi-test-server pod and returns its gNMI address. +func (c *ClusterEnvironment) DeployGNMIServer(ctx context.Context, namespace string) (netip.AddrPort, error) { + if err := c.runKubectl( + ctx, + "run", "gnmi-test-server", + "--image", serverImage, + "--image-pull-policy", "Never", + "--namespace", namespace, + "--restart", "Never", + "--port", "8000", + "--port", strconv.FormatUint(uint64(gnmiPort), 10), + ); err != nil { + return netip.AddrPort{}, fmt.Errorf("failed to deploy gnmi-test-server: %w", err) + } + + if err := c.runKubectl( + ctx, + "wait", "pods/gnmi-test-server", + "--for", "condition=Ready", + "--namespace", namespace, + "--timeout", "1m", + ); err != nil { + return netip.AddrPort{}, fmt.Errorf("gnmi-test-server pod not ready: %w", err) + } + + out, err := c.runKubectlOutput( + ctx, + "get", "pod", "gnmi-test-server", + "--output", "jsonpath={.status.podIP}", + "--namespace", namespace, + ) + if err != nil { + return netip.AddrPort{}, fmt.Errorf("failed to get gnmi-test-server IP: %w", err) + } + var s netip.Addr + if s, err = netip.ParseAddr(strings.TrimSpace(out)); err != nil { + return netip.AddrPort{}, fmt.Errorf("invalid IP address from gnmi-test-server pod: %w", err) + } + + return netip.AddrPortFrom(s, gnmiPort), nil +} + +// GetGNMIState fetches state via kubectl exec. +func (c *ClusterEnvironment) GetGNMIState(ctx context.Context, namespace string) ([]byte, error) { + out, err := c.runKubectlOutput( + ctx, + "exec", "gnmi-test-server", + "--namespace", namespace, + "--", + "wget", "-qO-", "http://localhost:8000/v1/state", + ) + if err != nil { + return nil, fmt.Errorf("failed to get gNMI state: %w", err) + } + return []byte(out), nil +} + +// ClearGNMIState clears state. +func (c *ClusterEnvironment) ClearGNMIState(ctx context.Context, namespace string) error { + _, err := c.runKubectlOutput( + ctx, + "exec", "gnmi-test-server", + "--namespace", namespace, + "--", + "wget", "-qO-", "--post-data=", "http://localhost:8000/v1/clear", + ) + return err +} + +// PreloadGNMIState preloads nested JSON into the gnmi-test-server state. +// This allows tests to set up paths like System/procsys-items/bootTime +// before the Device controller reconciles. +func (c *ClusterEnvironment) PreloadGNMIState(ctx context.Context, namespace string, jsonData []byte) error { + _, err := c.runKubectlOutput( + ctx, + "exec", "gnmi-test-server", + "--namespace", namespace, + "--", + "wget", "-qO-", "--post-data="+string(jsonData), "http://localhost:8000/v1/state", + ) + return err +} + +// runKubectl runs a kubectl command and returns an error if it fails. +func (c *ClusterEnvironment) runKubectl(ctx context.Context, args ...string) error { + cmd := exec.CommandContext(ctx, "kubectl", args...) + dir, _ := GetProjectDir() //nolint:errcheck // uses current dir as fallback + cmd.Dir = dir + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%s: %w", string(output), err) + } + return nil +} + +// runKubectlOutput runs a kubectl command and returns its output. +func (c *ClusterEnvironment) runKubectlOutput(ctx context.Context, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, "kubectl", args...) + dir, _ := GetProjectDir() //nolint:errcheck // uses current dir as fallback + cmd.Dir = dir + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%s: %w", stderr.String(), err) + } + return stdout.String(), nil +} + +// DeleteNamespace deletes the given namespace. +func (c *ClusterEnvironment) DeleteNamespace(ctx context.Context, namespace string) error { + if err := c.runKubectl(ctx, "delete", "namespace", namespace, "--ignore-not-found"); err != nil { + return fmt.Errorf("failed to delete namespace %s: %w", namespace, err) + } + return nil +} + +// CreateNamespace creates a new namespace with the given name and labels it for cleanup tracking. +func (c *ClusterEnvironment) CreateNamespace(ctx context.Context, namespace string) error { + if err := c.runKubectl(ctx, "create", "namespace", namespace); err != nil { + return fmt.Errorf("failed to create namespace %s: %w", namespace, err) + } + // Label the namespace for cleanup tracking across parallel test processes + if err := c.runKubectl(ctx, "label", "namespace", namespace, E2ETestLabel+"="); err != nil { + return fmt.Errorf("failed to label namespace %s: %w", namespace, err) + } + return nil +} + +// WaitForTestNamespacesGone waits for all labeled test namespaces to be fully deleted. +// This ensures DeferCleanup hooks have completed before the manager is undeployed. +// Without this, the manager (and CRDs) may be deleted while resources with finalizers still exist, +// leaving them stuck forever because the controller can no longer process the finalizers. +func (c *ClusterEnvironment) WaitForTestNamespacesGone(ctx context.Context) error { + // Get all namespaces with our e2e test label + out, err := c.runKubectlOutput( + ctx, + "get", "namespaces", + "-l", E2ETestLabel, + "-o", "jsonpath={.items[*].metadata.name}", + ) + if err != nil { + return fmt.Errorf("failed to list test namespaces: %w", err) + } + + namespaces := strings.Fields(out) + if len(namespaces) == 0 { + return nil + } + + // Wait for each test namespace to be deleted (with timeout) + for _, ns := range namespaces { + _ = c.runKubectl(ctx, "wait", "namespace", ns, //nolint:errcheck // best-effort cleanup + "--for=delete", + "--timeout=120s") + } + + return nil +} + +// DeleteCustomResources deletes all custom resources in the given namespace. +// Deletion order: CoreResources first (have finalizers), then ConfigResources, then Device. +// This allows finalizers to complete while their dependencies still exist. +func (c *ClusterEnvironment) DeleteCustomResources(ctx context.Context, namespace string) error { + deleteResources := func(gvks []schema.GroupVersionKind) { + for _, gvk := range gvks { + _ = c.runKubectl(ctx, "delete", //nolint:errcheck // best-effort cleanup + ResourcePluralName(gvk), "--all", + "--namespace", namespace, + "--ignore-not-found") + } + // Wait for resources to be fully gone (finalizers completed) + for _, gvk := range gvks { + _ = c.runKubectl(ctx, "wait", //nolint:errcheck // best-effort cleanup + ResourcePluralName(gvk), + "--for=delete", "--all", + "--namespace", namespace, + "--timeout=60s") + } + } + + // Delete core resources first (have finalizers that need Device + configs) + deleteResources(CoreResources) + // Then delete config resources + deleteResources(ConfigResources) + + // Delete Device LAST - after all other resources and their finalizers are done + _ = c.runKubectl(ctx, "delete", "devices", "--all", //nolint:errcheck // best-effort cleanup + "--namespace", namespace, + "--ignore-not-found") + _ = c.runKubectl(ctx, "wait", "devices", //nolint:errcheck // best-effort cleanup + "--for=delete", "--all", + "--namespace", namespace, + "--timeout=60s") + + return nil +} diff --git a/test/e2e/util_test.go b/test/e2e/util_test.go index 1281d18db..1492aa773 100644 --- a/test/e2e/util_test.go +++ b/test/e2e/util_test.go @@ -1,3 +1,5 @@ +//go:build !cluster && !envtest + // SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors // SPDX-License-Identifier: Apache-2.0 From e0403cc45d78ecd9d804a4a3eacfa01f66948944 Mon Sep 17 00:00:00 2001 From: Pujol Date: Thu, 18 Jun 2026 10:03:45 +0200 Subject: [PATCH 2/2] build: add test-e2e-cluster target and update deploy to use config/develop - Add install-ginkgo target for ginkgo CLI - Add PROVIDER variable for provider selection - Add GINKGO_PROCS variable for parallel test execution - Add test-e2e-cluster target for cluster-based e2e tests - Update deploy/undeploy to use config/develop instead of config/default Signed-off-by: Pujol --- Makefile | 17 ++++++----------- Makefile.maker.yaml | 35 +++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index f24dc12e1..73ed4b966 100644 --- a/Makefile +++ b/Makefile @@ -71,14 +71,14 @@ lint: FORCE bin/golangci-lint-custom ## Run golangci-lint linter @bin/golangci-lint-custom config verify @bin/golangci-lint-custom run -# PROVIDER defines which provider to test (cisco-nxos-gnmi, cisco-iosxr-gnmi, openconfig). -# Used by test-e2e-cluster and test-e2e-envtest to filter tests. -PROVIDER ?= cisco-nxos-gnmi - fmt: FORCE install-gofumpt @printf "\e[1;36m>> gofumpt -l -w .\e[0m\n" @gofumpt -l -w $(shell git ls-files '*.go' | grep -v '^internal/provider/openconfig') +# PROVIDER defines which provider to test (cisco-nxos-gnmi, cisco-iosxr-gnmi, openconfig). +# Used by test-e2e-cluster and test-e2e-envtest to filter tests. +PROVIDER ?= cisco-nxos-gnmi + # Run the scaffolded e2e tests (unchanged from Kubebuilder). test-e2e: FORCE @command -v kind >/dev/null 2>&1 || { \ @@ -104,13 +104,8 @@ test-e2e-cluster: FORCE install-ginkgo echo "No Kind cluster is running. Please start a Kind cluster before running the e2e tests."; \ exit 1; \ } - @printf "\e[1;36m>> ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=15m -v ./test/e2e/ (PROVIDER=$(PROVIDER))\e[0m\n" - @KIND_CLUSTER=$(KIND_CLUSTER) E2E_PROVIDER=$(PROVIDER) ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=15m -v ./test/e2e/ - -# Run gNMI controller tests in envtest mode (no cluster required). -test-e2e-envtest: FORCE install-setup-envtest - @printf "\e[1;36m>> go test ./test/e2e/ -tags=envtest -v -ginkgo.v (PROVIDER=$(PROVIDER))\e[0m\n" - @KUBEBUILDER_ASSETS=$$(setup-envtest use 1.32 -p path) E2E_PROVIDER=$(PROVIDER) go test ./test/e2e/ -tags=envtest -v -ginkgo.v + @printf "\e[1;36m>> ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=20m -v ./test/e2e/ (PROVIDER=$(PROVIDER))\e[0m\n" + @KIND_CLUSTER=$(KIND_CLUSTER) E2E_PROVIDER=$(PROVIDER) ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=20m -v ./test/e2e/ docker-build: FORCE @printf "\e[1;36m>> $(CONTAINER_TOOL) build --tag=$(IMG) .\e[0m\n" diff --git a/Makefile.maker.yaml b/Makefile.maker.yaml index 53143db03..fe8639dd5 100644 --- a/Makefile.maker.yaml +++ b/Makefile.maker.yaml @@ -92,6 +92,9 @@ verbatim: | install-gofumpt: FORCE @if ! hash gofumpt 2>/dev/null; then printf "\e[1;36m>> Installing gofumpt...\e[0m\n"; go install mvdan.cc/gofumpt@latest; fi + install-ginkgo: FORCE + @if ! hash ginkgo 2>/dev/null; then printf "\e[1;36m>> Installing ginkgo...\e[0m\n"; go install github.com/onsi/ginkgo/v2/ginkgo@latest; fi + install-kubebuilder: FORCE @set -eou pipefail; if ! hash kubebuilder 2>/dev/null; then printf "\e[1;36m>> Installing kubebuilder...\e[0m\n"; if command -v curl >/dev/null 2>&1; then GET="curl -sLo"; elif command -v wget >/dev/null 2>&1; then GET="wget -O"; else echo "Didn't find curl or wget to download kubebuilder"; exit 2; fi; BIN=$$(go env GOBIN); if [[ -z $$BIN ]]; then BIN=$$(go env GOPATH)/bin; fi; $$GET "$$BIN/kubebuilder" "https://go.kubebuilder.io/dl/latest/$$(go env GOOS)/$$(go env GOARCH)"; chmod +x "$$BIN/kubebuilder"; fi @@ -114,7 +117,11 @@ verbatim: | @printf "\e[1;36m>> gofumpt -l -w .\e[0m\n" @gofumpt -l -w $(shell git ls-files '*.go' | grep -v '^internal/provider/openconfig') - # Run the e2e tests against a k8s cluster. + # PROVIDER defines which provider to test (cisco-nxos-gnmi, cisco-iosxr-gnmi, openconfig). + # Used by test-e2e-cluster and test-e2e-envtest to filter tests. + PROVIDER ?= cisco-nxos-gnmi + + # Run the scaffolded e2e tests (unchanged from Kubebuilder). test-e2e: FORCE @command -v kind >/dev/null 2>&1 || { \ echo "Kind is not installed. Please install Kind manually."; \ @@ -127,6 +134,21 @@ verbatim: | @printf "\e[1;36m>> go test ./test/e2e/ -v -ginkgo.v\e[0m\n" @KIND_CLUSTER=$(KIND_CLUSTER) go test ./test/e2e/ -v -ginkgo.v + # Run gNMI controller tests in cluster mode (requires Kind cluster). + # Uses ginkgo for parallel execution. + GINKGO_PROCS ?= 4 + test-e2e-cluster: FORCE install-ginkgo + @command -v kind >/dev/null 2>&1 || { \ + echo "Kind is not installed. Please install Kind manually."; \ + exit 1; \ + } + @kind get clusters | grep -q $(KIND_CLUSTER) || { \ + echo "No Kind cluster is running. Please start a Kind cluster before running the e2e tests."; \ + exit 1; \ + } + @printf "\e[1;36m>> ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=20m -v ./test/e2e/ (PROVIDER=$(PROVIDER))\e[0m\n" + @KIND_CLUSTER=$(KIND_CLUSTER) E2E_PROVIDER=$(PROVIDER) ginkgo -procs=$(GINKGO_PROCS) -tags=cluster -timeout=20m -v ./test/e2e/ + docker-build: FORCE @printf "\e[1;36m>> $(CONTAINER_TOOL) build --tag=$(IMG) .\e[0m\n" @$(CONTAINER_TOOL) build --build-arg=BININFO_BUILD_DATE=$(BININFO_BUILD_DATE) --build-arg=BININFO_COMMIT_HASH=$(BININFO_COMMIT_HASH) --build-arg=BININFO_VERSION=$(BININFO_VERSION) --tag=$(IMG) . @@ -140,15 +162,16 @@ verbatim: | @printf "\e[1;36m>> kustomize build config/default > dist/install.yaml\e[0m\n" @mkdir -p dist; kustomize build config/default > dist/install.yaml - # Deploy controller to the k8s cluster + # Deploy controller to the k8s cluster. + # Use PROVIDER to set the provider (default: cisco-nxos-gnmi). deploy: FORCE generate install-kustomize - @printf "\e[1;36m>> kustomize build config/default | kubectl apply -f -\e[0m\n" - @kustomize build config/default | kubectl apply -f - + @printf "\e[1;36m>> deploying controller-manager (PROVIDER=$(PROVIDER))\e[0m\n" + @kustomize build config/develop | sed 's/--provider=openconfig/--provider=$(PROVIDER)/' | kubectl apply -f - # Undeploy controller from the k8s cluster undeploy: FORCE install-kustomize - @printf "\e[1;36m>> kustomize build config/default | kubectl delete -f -\e[0m\n" - @kustomize build config/default | kubectl delete --ignore-not-found=true -f - + @printf "\e[1;36m>> undeploying controller-manager\e[0m\n" + @kustomize build config/develop | kubectl delete --ignore-not-found=true -f - # Install CRDs into the k8s cluster deploy-crds: FORCE generate install-kustomize