From 20e969161048b53a4618aa715c75e29e3e797695 Mon Sep 17 00:00:00 2001 From: Rajath Agasthya Date: Wed, 6 May 2026 13:45:12 -0500 Subject: [PATCH] Remove shell dependency from validator pods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVIDIA's distroless-cc `-dev` tag (the gpu-operator image base) will no longer be approved as a STIG parent image. The non-`-dev` variant ships no shell, so the validator daemonsets and workload validation pods — which wrapped binaries in `sh -c` and used shell-based preStop hooks — would break on the new base. Re-adding a shell to the image would only swap one CVE source for another. Replace shell wrappers with direct binary invocation. The operator-validator and sandbox-validator init containers invoke `nvidia-validator` directly. Their pause containers use a new top-level `--sleep` flag that prints the validator-success message and blocks on SIGTERM. Workload pod main containers run `nvidia-validator --version` as a no-op exit-0; the per-workload success message now prints from `(c *CUDA).runWorkload` and `(p *Plugin).runWorkload` after `waitForPod` succeeds — surfacing in the operator-validator init container logs where success is actually established. For preStop cleanup, add a small static helper `rmglob` that takes glob patterns and removes matching paths. Modeled on k8s-cc-manager's vendored static `/bin/rm`, shipped at `/usr/bin/rmglob`. Both validator daemonsets keep their `lifecycle.preStop` blocks; they now call this binary instead of `sh -c rm`. Drop `hack/must-gather.sh` from the image entrypoint at `/usr/bin/gather`. It depended on `bash`, `kubectl`, and `oc` — none of which ship in the distroless base. Customers already run the script from outside the cluster against an existing kubeconfig; removing the in-image copy doesn't change that workflow. Flip the Dockerfile base to `nvcr.io/nvidia/distroless/cc:v4.0.4`. Signed-off-by: Rajath Agasthya --- .../0500_daemonset.yaml | 18 ++--- .../0500_daemonset.yaml | 18 ++--- cmd/nvidia-validator/main.go | 75 +++++++++++++++---- cmd/nvidia-validator/main_test.go | 71 ++++++++++++++++++ cmd/rmglob/main.go | 60 +++++++++++++++ cmd/rmglob/main_test.go | 75 +++++++++++++++++++ docker/Dockerfile | 5 +- .../manifests/cuda-workload-validation.yaml | 7 +- .../manifests/plugin-workload-validation.yaml | 7 +- 9 files changed, 288 insertions(+), 48 deletions(-) create mode 100644 cmd/rmglob/main.go create mode 100644 cmd/rmglob/main_test.go diff --git a/assets/state-operator-validation/0500_daemonset.yaml b/assets/state-operator-validation/0500_daemonset.yaml index cddc4f8bb..20247d4f7 100644 --- a/assets/state-operator-validation/0500_daemonset.yaml +++ b/assets/state-operator-validation/0500_daemonset.yaml @@ -28,8 +28,7 @@ spec: initContainers: - name: driver-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: WITH_WAIT value: "true" @@ -58,8 +57,7 @@ spec: mountPath: /host-dev-char - name: toolkit-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: NVIDIA_VISIBLE_DEVICES value: "all" @@ -75,8 +73,7 @@ spec: mountPropagation: Bidirectional - name: cuda-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: WITH_WAIT value: "false" @@ -98,8 +95,7 @@ spec: mountPropagation: Bidirectional - name: plugin-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: COMPONENT value: plugin @@ -126,14 +122,14 @@ spec: containers: - image: "FILLED BY THE OPERATOR" name: nvidia-operator-validator - command: ['sh', '-c'] - args: ["echo all validations are successful; while true; do sleep 86400; done"] + command: ["nvidia-validator"] + args: ["--sleep"] securityContext: privileged: true lifecycle: preStop: exec: - command: ["sh", "-c", "rm -f /run/nvidia/validations/*-ready"] + command: ["/usr/bin/rmglob", "/run/nvidia/validations/*-ready"] volumeMounts: - name: run-nvidia-validations mountPath: "/run/nvidia/validations" diff --git a/assets/state-sandbox-validation/0500_daemonset.yaml b/assets/state-sandbox-validation/0500_daemonset.yaml index fcc2aa12a..b5731a543 100644 --- a/assets/state-sandbox-validation/0500_daemonset.yaml +++ b/assets/state-sandbox-validation/0500_daemonset.yaml @@ -28,8 +28,7 @@ spec: initContainers: - name: cc-manager-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: WITH_WAIT value: "true" @@ -49,8 +48,7 @@ spec: mountPropagation: Bidirectional - name: vfio-pci-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: WITH_WAIT value: "true" @@ -74,8 +72,7 @@ spec: mountPropagation: Bidirectional - name: vgpu-manager-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: WITH_WAIT value: "true" @@ -102,8 +99,7 @@ spec: mountPropagation: Bidirectional - name: vgpu-devices-validation image: "FILLED BY THE OPERATOR" - command: ['sh', '-c'] - args: ["nvidia-validator"] + command: ["nvidia-validator"] env: - name: WITH_WAIT value: "true" @@ -122,14 +118,14 @@ spec: containers: - image: "FILLED BY THE OPERATOR" name: nvidia-sandbox-validator - command: ['sh', '-c'] - args: ["echo all validations are successful; while true; do sleep 86400; done"] + command: ["nvidia-validator"] + args: ["--sleep"] securityContext: privileged: true lifecycle: preStop: exec: - command: ["sh", "-c", "rm -f /run/nvidia/validations/*"] + command: ["/usr/bin/rmglob", "/run/nvidia/validations/*"] volumeMounts: - name: run-nvidia-validations mountPath: "/run/nvidia/validations" diff --git a/cmd/nvidia-validator/main.go b/cmd/nvidia-validator/main.go index 8ffa41e28..c373dc11c 100644 --- a/cmd/nvidia-validator/main.go +++ b/cmd/nvidia-validator/main.go @@ -135,6 +135,7 @@ var ( hostRootFlag string driverInstallDirFlag string driverInstallDirCtrPathFlag string + sleepFlag bool ) // defaultGPUWorkloadConfig is "vm-passthrough" unless @@ -375,14 +376,17 @@ func main() { Destination: &driverInstallDirCtrPathFlag, Sources: cli.EnvVars("DRIVER_INSTALL_DIR_CTR_PATH"), }, + &cli.BoolFlag{ + Name: "sleep", + Usage: "after any other action, print the validator-success message and block until SIGTERM/SIGINT/SIGHUP, then exit 0", + Destination: &sleepFlag, + Sources: cli.EnvVars("SLEEP"), + }, } // Log version info log.Infof("version: %s", c.Version) - // Handle signals - go handleSignal() - // invoke command err := c.Run(context.Background(), os.Args) if err != nil { @@ -404,6 +408,10 @@ func handleSignal() { func validateFlags(ctx context.Context, cli *cli.Command) (context.Context, error) { if componentFlag == "" { + // Standalone --sleep mode does not require a component. + if sleepFlag { + return ctx, nil + } return ctx, fmt.Errorf("invalid -c flag: must not be empty string") } if !isValidComponent() { @@ -509,24 +517,59 @@ func getWorkloadConfig(ctx context.Context) (string, error) { } func start(ctx context.Context, cli *cli.Command) error { - // if cleanup is requested, delete all existing status files(default) - if cleanupAllFlag { - // cleanup output directory and create again each time - err := os.RemoveAll(outputDirFlag) - if err != nil { - if !os.IsNotExist(err) { - return err + // In sleep mode, runSleep installs its own signal handler. Otherwise + // preserve legacy behavior: any signal terminates the process. + if !sleepFlag { + go handleSignal() + } + + if componentFlag != "" { + // if cleanup is requested, delete all existing status files(default) + if cleanupAllFlag { + // cleanup output directory and create again each time + err := os.RemoveAll(outputDirFlag) + if err != nil { + if !os.IsNotExist(err) { + return err + } } } + + // create status directory + err := os.Mkdir(outputDirFlag, 0755) + if err != nil && !os.IsExist(err) { + return err + } + + if err := validateComponent(ctx, componentFlag); err != nil { + return err + } } - // create status directory - err := os.Mkdir(outputDirFlag, 0755) - if err != nil && !os.IsExist(err) { - return err + if sleepFlag { + return runSleep(ctx) } + return nil +} - return validateComponent(ctx, componentFlag) +// runSleep prints the validator-success message and blocks until a +// termination signal arrives, then exits cleanly. Per-pod cleanup of +// status markers is handled separately by the rmglob binary invoked +// from `lifecycle.preStop`. +func runSleep(ctx context.Context) error { + fmt.Println("all validations are successful") + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP) + defer signal.Stop(sigCh) + + select { + case <-ctx.Done(): + log.Infof("context canceled") + case s := <-sigCh: + log.Infof("received signal %s", s) + } + return nil } func validateComponent(ctx context.Context, componentFlag string) error { @@ -1368,6 +1411,7 @@ func (p *Plugin) runWorkload() error { if err != nil { return err } + fmt.Println("device-plugin workload validation is successful") return nil } @@ -1621,6 +1665,7 @@ func (c *CUDA) runWorkload() error { if err != nil { return err } + fmt.Println("cuda workload validation is successful") return nil } diff --git a/cmd/nvidia-validator/main_test.go b/cmd/nvidia-validator/main_test.go index d0199dd18..0a84eb9fe 100644 --- a/cmd/nvidia-validator/main_test.go +++ b/cmd/nvidia-validator/main_test.go @@ -19,7 +19,9 @@ package main import ( "context" "os" + "syscall" "testing" + "time" ) func Test_isValidComponent(t *testing.T) { @@ -216,3 +218,72 @@ UNKNOWN_FEATURE: true`, }) } } + +func Test_validateFlags_standaloneSleep(t *testing.T) { + tests := []struct { + name string + component string + sleep bool + wantErr bool + }{ + {name: "no component, no sleep: error", wantErr: true}, + {name: "no component, sleep: ok", sleep: true}, + {name: "valid component, no sleep: ok", component: "driver"}, + {name: "valid component, sleep: ok", component: "driver", sleep: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + origComponent, origSleep := componentFlag, sleepFlag + componentFlag, sleepFlag = tt.component, tt.sleep + defer func() { + componentFlag, sleepFlag = origComponent, origSleep + }() + + _, err := validateFlags(context.Background(), nil) + if tt.wantErr && err == nil { + t.Errorf("validateFlags() expected error, got nil") + } + if !tt.wantErr && err != nil { + t.Errorf("validateFlags() unexpected error: %v", err) + } + }) + } +} + +func Test_runSleep_returnsOnSignal(t *testing.T) { + errCh := make(chan error, 1) + go func() { errCh <- runSleep(context.Background()) }() + + // Give runSleep a moment to install its signal handler before sending. + time.Sleep(50 * time.Millisecond) + if err := syscall.Kill(syscall.Getpid(), syscall.SIGTERM); err != nil { + t.Fatalf("kill: %v", err) + } + + select { + case err := <-errCh: + if err != nil { + t.Errorf("runSleep returned error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatalf("runSleep did not return within 2s of SIGTERM") + } +} + +func Test_runSleep_contextCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + errCh := make(chan error, 1) + go func() { errCh <- runSleep(ctx) }() + + time.Sleep(50 * time.Millisecond) + cancel() + + select { + case err := <-errCh: + if err != nil { + t.Errorf("runSleep returned error: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatalf("runSleep did not return within 2s of context cancel") + } +} diff --git a/cmd/rmglob/main.go b/cmd/rmglob/main.go new file mode 100644 index 000000000..bc3a3dc1e --- /dev/null +++ b/cmd/rmglob/main.go @@ -0,0 +1,60 @@ +/* +Copyright (c) NVIDIA CORPORATION. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// rmglob is a tiny static helper binary that expands one or more glob +// patterns and removes the matching paths. It exists so that distroless +// gpu-operator container images can run path cleanup from a Kubernetes +// `lifecycle.preStop` hook without needing a shell on the image. +// +// It is the path-cleanup analog of k8s-cc-manager's vendored static `/bin/rm`. +package main + +import ( + "fmt" + "os" + "path/filepath" +) + +func main() { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "usage: rmglob ...") + os.Exit(2) + } + + var failed bool + for _, pattern := range os.Args[1:] { + matches, err := filepath.Glob(pattern) + if err != nil { + //#nosec G705 -- stderr diagnostic, not a network-reachable sink + fmt.Fprintf(os.Stderr, "rmglob: invalid pattern %q: %v\n", pattern, err) + failed = true + continue + } + for _, m := range matches { + // Path removal is the binary's sole purpose; the patterns come from + // gpu-operator-rendered manifests, not external user input. + //#nosec G703 -- intentional path removal + if err := os.RemoveAll(m); err != nil { + //#nosec G705 -- stderr diagnostic, not a network-reachable sink + fmt.Fprintf(os.Stderr, "rmglob: remove %q: %v\n", m, err) + failed = true + } + } + } + if failed { + os.Exit(1) + } +} diff --git a/cmd/rmglob/main_test.go b/cmd/rmglob/main_test.go new file mode 100644 index 000000000..432971c9a --- /dev/null +++ b/cmd/rmglob/main_test.go @@ -0,0 +1,75 @@ +/* +Copyright (c) NVIDIA CORPORATION. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "testing" +) + +var rmglobBin string + +func TestMain(m *testing.M) { + dir, err := os.MkdirTemp("", "rmglob-test-") + if err != nil { + fmt.Fprintf(os.Stderr, "tempdir: %v\n", err) + os.Exit(2) + } + + rmglobBin = filepath.Join(dir, "rmglob") + if out, err := exec.Command("go", "build", "-o", rmglobBin, ".").CombinedOutput(); err != nil { + fmt.Fprintf(os.Stderr, "build: %v\n%s", err, out) + os.RemoveAll(dir) + os.Exit(2) + } + code := m.Run() + os.RemoveAll(dir) + os.Exit(code) +} + +func TestRmglob(t *testing.T) { + tmpDir := t.TempDir() + for _, name := range []string{"a-ready", "b-ready", "keep.txt"} { + if err := os.WriteFile(filepath.Join(tmpDir, name), []byte("x"), 0600); err != nil { + t.Fatalf("write: %v", err) + } + } + + //#nosec G204 -- test-only invocation of a binary built by TestMain + if out, err := exec.Command(rmglobBin, filepath.Join(tmpDir, "*-ready")).CombinedOutput(); err != nil { + t.Fatalf("run: %v\n%s", err, out) + } + + if _, err := os.Stat(filepath.Join(tmpDir, "a-ready")); !os.IsNotExist(err) { + t.Errorf("a-ready should be removed, stat err=%v", err) + } + if _, err := os.Stat(filepath.Join(tmpDir, "b-ready")); !os.IsNotExist(err) { + t.Errorf("b-ready should be removed, stat err=%v", err) + } + if _, err := os.Stat(filepath.Join(tmpDir, "keep.txt")); err != nil { + t.Errorf("keep.txt should remain, stat err=%v", err) + } +} + +func TestRmglobNoArgs(t *testing.T) { + if err := exec.Command(rmglobBin).Run(); err == nil { + t.Errorf("rmglob with no args expected non-zero exit, got 0") + } +} diff --git a/docker/Dockerfile b/docker/Dockerfile index 121eed9ec..570706023 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -72,7 +72,7 @@ RUN curl -L https://codeload.github.com/NVIDIA/cuda-samples/tar.gz/refs/tags/v${ # The C/C++ distroless image is used as a base since the CUDA vectorAdd # sample application depends on C/C++ libraries. -FROM nvcr.io/nvidia/distroless/cc:v4.0.4-dev +FROM nvcr.io/nvidia/distroless/cc:v4.0.4 ENV NVIDIA_VISIBLE_DEVICES=void @@ -92,6 +92,7 @@ WORKDIR / COPY --from=builder /workspace/gpu-operator /usr/bin/ COPY --from=builder /workspace/manage-crds /usr/bin/ COPY --from=builder /workspace/nvidia-validator /usr/bin/ +COPY --from=builder /workspace/rmglob /usr/bin/ COPY --from=sample-builder /build/vectorAdd /usr/bin/vectorAdd ARG CUDA_SAMPLES_VERSION COPY --from=sample-builder /usr/local/cuda-${CUDA_SAMPLES_VERSION}/compat /usr/local/cuda/compat @@ -100,8 +101,6 @@ COPY assets /opt/gpu-operator/ COPY manifests /opt/gpu-operator/manifests COPY validator/manifests /opt/validator/manifests -COPY hack/must-gather.sh /usr/bin/gather - # Add CRD resource into the image for helm upgrades COPY deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml /opt/gpu-operator/nvidia.com_clusterpolicies.yaml COPY deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml diff --git a/validator/manifests/cuda-workload-validation.yaml b/validator/manifests/cuda-workload-validation.yaml index fa47df5f7..614454048 100644 --- a/validator/manifests/cuda-workload-validation.yaml +++ b/validator/manifests/cuda-workload-validation.yaml @@ -18,8 +18,7 @@ spec: - name: cuda-validation image: "FILLED_BY_THE_VALIDATOR" imagePullPolicy: IfNotPresent - command: ['sh', '-c'] - args: ["vectorAdd"] + command: ["vectorAdd"] env: - name: NVIDIA_VISIBLE_DEVICES value: "all" @@ -30,8 +29,8 @@ spec: image: "FILLED_BY_THE_VALIDATOR" imagePullPolicy: IfNotPresent # override command and args as validation is already done by initContainer - command: ['sh', '-c'] - args: ["echo cuda workload validation is successful"] + command: ["nvidia-validator"] + args: ["--version"] securityContext: privileged: true readOnlyRootFilesystem: true diff --git a/validator/manifests/plugin-workload-validation.yaml b/validator/manifests/plugin-workload-validation.yaml index 80bb657e2..d77551ad4 100644 --- a/validator/manifests/plugin-workload-validation.yaml +++ b/validator/manifests/plugin-workload-validation.yaml @@ -16,8 +16,7 @@ spec: - name: plugin-validation image: "FILLED_BY_VALIDATOR" imagePullPolicy: IfNotPresent - command: ['sh', '-c'] - args: ["vectorAdd"] + command: ["vectorAdd"] securityContext: allowPrivilegeEscalation: false resources: @@ -28,8 +27,8 @@ spec: image: "FILLED_BY_VALIDATOR" imagePullPolicy: IfNotPresent # override command and args as validation is already done by initContainer - command: ['sh', '-c'] - args: ["echo device-plugin workload validation is successful"] + command: ["nvidia-validator"] + args: ["--version"] securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true