From 58f674b8eb80b9843185f659f122c3a8a8aaa95f Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 26 May 2026 19:04:16 +0000 Subject: [PATCH 01/13] fix(security): replace shell-templated skills-init with Go binary (CVE GHSA #1842) The previous skills-init container rendered a Bash script from a Go template, interpolating user-controlled Agent CRD fields (Git URL/Ref/ Path/Name, OCI image, secret name) into `cat <<'ENDVAL' ... ENDVAL` heredocs. A low-privileged user with create/update on Agent could embed `ENDVAL` in any of those fields to escape the heredoc and execute arbitrary commands inside the init container, which could reach the node's IMDS or other in-cluster services. This commit eliminates the shell entirely: * New `skills-init` Go binary (`go/core/cmd/skills-init`) consumes a structured JSON config and invokes `git`/SSH via `exec.Command` with argv vectors. User strings never reach a shell. * OCI fetch moves to in-process `go-containerregistry` (no more krane binary or jq for docker-config merging). Tar extraction rejects absolute paths, `..` traversal, and symlinks whose targets escape the destination. * Controller emits a per-Agent ConfigMap with the JSON config; the pod template carries a `kagent.dev/skills-init-hash` annotation so config changes trigger pod rollout. * skills-init Dockerfile rewritten as a multi-stage build of the Go binary; krane and jq are dropped from the runtime image. Tested end-to-end on a kind cluster: git skills clone via argv and the agent starts; OCI pull/extract works for benign content; the tar extractor correctly rejects path-escape symlinks. Signed-off-by: Eitan Yarmush --- Makefile | 2 +- docker/skills-init/Dockerfile | 49 +++-- go/core/cmd/skills-init/main.go | 35 ++++ .../translator/agent/adk_api_translator.go | 194 +++++++++--------- .../translator/agent/git_skills_test.go | 110 +++++++--- .../translator/agent/manifest_builder.go | 47 +++-- .../translator/agent/skills-init.sh.tmpl | 95 --------- .../translator/agent/skills_unit_test.go | 45 ++-- .../outputs/agent_with_git_skills.json | 37 +++- .../testdata/outputs/agent_with_skills.json | 37 +++- go/core/internal/skillsinit/config.go | 78 +++++++ go/core/internal/skillsinit/copytree.go | 82 ++++++++ go/core/internal/skillsinit/docker.go | 52 +++++ go/core/internal/skillsinit/git.go | 129 ++++++++++++ go/core/internal/skillsinit/oci.go | 147 +++++++++++++ go/core/internal/skillsinit/runner.go | 64 ++++++ go/core/internal/skillsinit/ssh.go | 113 ++++++++++ go/go.mod | 6 + go/go.sum | 18 +- 19 files changed, 1051 insertions(+), 289 deletions(-) create mode 100644 go/core/cmd/skills-init/main.go delete mode 100644 go/core/internal/controller/translator/agent/skills-init.sh.tmpl create mode 100644 go/core/internal/skillsinit/config.go create mode 100644 go/core/internal/skillsinit/copytree.go create mode 100644 go/core/internal/skillsinit/docker.go create mode 100644 go/core/internal/skillsinit/git.go create mode 100644 go/core/internal/skillsinit/oci.go create mode 100644 go/core/internal/skillsinit/runner.go create mode 100644 go/core/internal/skillsinit/ssh.go diff --git a/Makefile b/Makefile index 4a0ae05149..abea744aba 100644 --- a/Makefile +++ b/Makefile @@ -279,7 +279,7 @@ build-golang-adk-full: buildx-create .PHONY: build-skills-init build-skills-init: ## Build and push the skills-init image build-skills-init: buildx-create - $(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) -t $(SKILLS_INIT_IMG) -f docker/skills-init/Dockerfile docker/skills-init + $(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) -t $(SKILLS_INIT_IMG) -f docker/skills-init/Dockerfile ./go $(DOCKER_PUSH) $(SKILLS_INIT_IMG) .PHONY: push diff --git a/docker/skills-init/Dockerfile b/docker/skills-init/Dockerfile index dc89810f74..ab522b502c 100644 --- a/docker/skills-init/Dockerfile +++ b/docker/skills-init/Dockerfile @@ -1,24 +1,39 @@ -### Stage 0: build krane -FROM golang:1.26-alpine AS krane-builder - -ENV KRANE_VERSION=v0.21.2 -WORKDIR /build - -RUN apk add --no-cache git && \ - git clone --depth 1 --branch $KRANE_VERSION \ - https://github.com/google/go-containerregistry.git - -WORKDIR /build/go-containerregistry/cmd/krane - -RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /build/krane . - +### Stage 0: build the skills-init Go binary +ARG BASE_IMAGE_REGISTRY=cgr.dev +ARG BUILDPLATFORM +FROM --platform=$BUILDPLATFORM $BASE_IMAGE_REGISTRY/chainguard/go:latest AS builder +ARG TARGETARCH +ARG TARGETOS + +WORKDIR /workspace + +COPY go.mod go.sum ./ +RUN --mount=type=cache,target=/root/go/pkg/mod,rw \ + --mount=type=cache,target=/root/.cache/go-build,rw \ + go mod download + +COPY api/ api/ +COPY core/ core/ +COPY adk/ adk/ + +ARG LDFLAGS +RUN --mount=type=cache,target=/root/go/pkg/mod,rw \ + --mount=type=cache,target=/root/.cache/go-build,rw \ + CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -a -trimpath -ldflags "$LDFLAGS" -o /skills-init ./core/cmd/skills-init + +### Stage 1: runtime FROM alpine:3.23 ARG PYTHON_UID=1001 ARG PYTHON_GID=1001 -RUN apk upgrade --no-cache && apk add --no-cache git jq -COPY --from=krane-builder /build/krane /usr/local/bin/krane +# git is invoked by skills-init via exec.Command with an argv vector — never +# through a shell — so the only attack surface here is git itself. OCI fetch +# uses the in-process go-containerregistry library, so krane and jq are gone. +RUN apk upgrade --no-cache && apk add --no-cache git openssh-client ca-certificates + +COPY --from=builder /skills-init /usr/local/bin/skills-init # Run as the same UID/GID as the main agent container (python user) so that # files written to the shared /skills volume are readable by the main container. @@ -28,3 +43,5 @@ RUN addgroup -g ${PYTHON_GID} pythongroup && \ adduser -u ${PYTHON_UID} -G pythongroup -s /bin/sh -D python USER ${PYTHON_UID}:${PYTHON_GID} + +ENTRYPOINT ["/usr/local/bin/skills-init"] diff --git a/go/core/cmd/skills-init/main.go b/go/core/cmd/skills-init/main.go new file mode 100644 index 0000000000..9e1cb0ec78 --- /dev/null +++ b/go/core/cmd/skills-init/main.go @@ -0,0 +1,35 @@ +// Command skills-init is the init container binary that fetches an Agent's +// skills from git repositories and OCI images before the main agent container +// starts. +// +// It reads its configuration from a ConfigMap-mounted JSON file (see the +// skillsinit package for the wire format) and performs all subprocess +// invocations with argv vectors — no user input is ever interpolated into a +// shell, which is the original design defect that motivated this rewrite. +package main + +import ( + "context" + "log" + "os" + + "github.com/kagent-dev/kagent/go/core/internal/skillsinit" +) + +func main() { + log.SetFlags(log.LstdFlags | log.Lmicroseconds) + + cfg, err := skillsinit.LoadConfig() + if err != nil { + log.Fatalf("skills-init: %v", err) + } + + home, _ := os.UserHomeDir() + if home == "" { + home = "/root" + } + + if err := skillsinit.Run(context.Background(), cfg, home); err != nil { + log.Fatalf("skills-init: %v", err) + } +} diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index 0ee9528f8b..350713a4ed 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -1,10 +1,8 @@ package agent import ( - "bytes" "context" "crypto/sha256" - _ "embed" "encoding/binary" "encoding/hex" "encoding/json" @@ -17,11 +15,11 @@ import ( "regexp" "slices" "strings" - "text/template" "time" "github.com/kagent-dev/kagent/go/api/adk" "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/internal/skillsinit" "github.com/kagent-dev/kagent/go/core/internal/utils" "github.com/kagent-dev/kagent/go/core/internal/version" "github.com/kagent-dev/kagent/go/core/pkg/env" @@ -31,6 +29,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -1115,53 +1114,55 @@ func gitSkillName(ref v1alpha2.GitRepo) string { var ( scpLikeGitURLRegex = regexp.MustCompile(`^(?:[^@/]+@)?([^:/]+):.+$`) - // validHostPattern and validPortPattern are the security boundary that prevents - // shell injection when host/port values are interpolated into the ssh-keyscan - // commands in skills-init.sh.tmpl. Do NOT relax these patterns without auditing - // every template site that references .Host or .Port. + // validHostPattern and validPortPattern are input-hygiene patterns for SSH + // host/port values. They used to be a shell-injection boundary when these + // values were interpolated into the rendered shell script; the + // skills-init container is now driven by a structured JSON config so + // values reach ssh-keyscan as argv entries and shell metacharacters are + // inert. We keep the patterns to reject obvious garbage early. validHostPattern = regexp.MustCompile(`^[A-Za-z0-9.\-]+$`) validPortPattern = regexp.MustCompile(`^[0-9]+$`) ) -func gitSSHHost(rawURL string) (sshHostData, bool) { +func gitSSHHost(rawURL string) (skillsinit.SSHHost, bool) { parsed, err := url.Parse(rawURL) if err == nil { switch parsed.Scheme { case "ssh", "git+ssh": host := parsed.Hostname() if host == "" || !validHostPattern.MatchString(host) { - return sshHostData{}, false + return skillsinit.SSHHost{}, false } port := parsed.Port() if port == "22" { port = "" // 22 is the SSH default; omit to avoid redundant -p flag } if port != "" && !validPortPattern.MatchString(port) { - return sshHostData{}, false + return skillsinit.SSHHost{}, false } - return sshHostData{ + return skillsinit.SSHHost{ Host: host, Port: port, }, true case "http", "https": - return sshHostData{}, false + return skillsinit.SSHHost{}, false } } if strings.Contains(rawURL, "://") { - return sshHostData{}, false + return skillsinit.SSHHost{}, false } matches := scpLikeGitURLRegex.FindStringSubmatch(rawURL) if len(matches) != 2 { - return sshHostData{}, false + return skillsinit.SSHHost{}, false } host := matches[1] if !validHostPattern.MatchString(host) { - return sshHostData{}, false + return skillsinit.SSHHost{}, false } - return sshHostData{Host: host}, true + return skillsinit.SSHHost{Host: host}, true } // validateSubPath rejects subPath values that are absolute or contain ".." traversal segments. @@ -1178,52 +1179,6 @@ func validateSubPath(p string) error { return nil } -// skillsInitData holds the template data for the unified skills-init script. -type skillsInitData struct { - AuthMountPath string // "/git-auth" or "" (for git auth) - GitRefs []gitRefData // git repos to clone - OCIRefs []ociRefData // OCI images to pull - InsecureOCI bool // --insecure flag for krane - SSHHosts []sshHostData // extra hosts to add to known_hosts via ssh-keyscan - ImagePullSecrets []string // secret names whose .dockerconfigjson are merged by the script -} - -// sshHostData holds the host and optional port for an SSH known_hosts entry. -type sshHostData struct { - Host string // hostname or IP - Port string // port number, empty means default (22) -} - -// gitRefData holds pre-computed fields for each git skill ref, used by the script template. -type gitRefData struct { - URL string - Ref string - Dest string // e.g. /skills/my-skill - IsCommit bool // true if Ref is a 40-char hex SHA - SubPath string // Path with trailing slash stripped -} - -// ociRefData holds pre-computed fields for each OCI skill ref, used by the script template. -type ociRefData struct { - Image string // full image ref e.g. ghcr.io/org/skill:v1 - Dest string // /skills/ -} - -//go:embed skills-init.sh.tmpl -var skillsInitScriptTmpl string - -// skillsScriptTemplate is the shell script template for fetching skills from Git and OCI. -var skillsScriptTemplate = template.Must(template.New("skills-init").Parse(skillsInitScriptTmpl)) - -// buildSkillsScript renders the unified skills-init shell script. -func buildSkillsScript(data skillsInitData) (string, error) { - var buf bytes.Buffer - if err := skillsScriptTemplate.Execute(&buf, data); err != nil { - return "", fmt.Errorf("failed to render skills init script: %w", err) - } - return buf.String(), nil -} - // ociSkillName extracts a skill directory name from an OCI image reference. // It takes the last path component of the repo (stripped of tag/digest). func ociSkillName(imageRef string) string { @@ -1241,22 +1196,25 @@ func ociSkillName(imageRef string) string { return path.Base(ref) } -// prepareSkillsInitData converts CRD values to the template-ready data struct. -// It validates subPaths and detects duplicate skill directory names. -func prepareSkillsInitData( +// prepareSkillsInitConfig converts CRD values into the JSON config consumed by +// the skills-init binary. It validates subPaths and detects duplicate skill +// directory names. User-controlled strings (URL, ref, name, OCI image) flow +// through this struct as data only — the binary passes them to git/library +// calls as argv vectors, never as shell input. +func prepareSkillsInitConfig( gitRefs []v1alpha2.GitRepo, authSecretRef *corev1.LocalObjectReference, ociRefs []string, insecureOCI bool, imagePullSecrets []string, -) (skillsInitData, error) { - data := skillsInitData{ +) (skillsinit.Config, error) { + cfg := skillsinit.Config{ InsecureOCI: insecureOCI, ImagePullSecrets: imagePullSecrets, } if authSecretRef != nil { - data.AuthMountPath = "/git-auth" + cfg.AuthMountPath = skillsinit.AuthMountPath } seen := make(map[string]bool) @@ -1265,7 +1223,7 @@ func prepareSkillsInitData( for _, ref := range gitRefs { subPath := strings.TrimSuffix(ref.Path, "/") if err := validateSubPath(subPath); err != nil { - return skillsInitData{}, err + return skillsinit.Config{}, err } gitRef := ref.Ref @@ -1276,26 +1234,26 @@ func prepareSkillsInitData( name := gitSkillName(ref) if seen[name] { - return skillsInitData{}, fmt.Errorf("duplicate skill directory name %q", name) + return skillsinit.Config{}, fmt.Errorf("duplicate skill directory name %q", name) } seen[name] = true - // SSH host collection is separate from the AuthMountPath block above - // because it runs per-ref inside the loop, not once at the top level. + // SSH host collection runs per-ref inside the loop, not once at the + // top level, because the host comes from the per-ref URL. if authSecretRef != nil { if sshHost, ok := gitSSHHost(ref.URL); ok { key := sshHost.Host + ":" + sshHost.Port if !seenSSHHosts[key] { seenSSHHosts[key] = true - data.SSHHosts = append(data.SSHHosts, sshHost) + cfg.SSHHosts = append(cfg.SSHHosts, sshHost) } } } - data.GitRefs = append(data.GitRefs, gitRefData{ + cfg.GitRefs = append(cfg.GitRefs, skillsinit.GitRef{ URL: ref.URL, Ref: gitRef, - Dest: "/skills/" + name, + Dest: skillsinit.SkillsDir + "/" + name, IsCommit: isCommitSHA(gitRef), SubPath: subPath, }) @@ -1304,66 +1262,100 @@ func prepareSkillsInitData( for _, imageRef := range ociRefs { name := ociSkillName(imageRef) if seen[name] { - return skillsInitData{}, fmt.Errorf("duplicate skill directory name %q", name) + return skillsinit.Config{}, fmt.Errorf("duplicate skill directory name %q", name) } seen[name] = true - data.OCIRefs = append(data.OCIRefs, ociRefData{ + cfg.OCIRefs = append(cfg.OCIRefs, skillsinit.OCIRef{ Image: imageRef, - Dest: "/skills/" + name, + Dest: skillsinit.SkillsDir + "/" + name, }) } - slices.SortFunc(data.SSHHosts, func(a, b sshHostData) int { + slices.SortFunc(cfg.SSHHosts, func(a, b skillsinit.SSHHost) int { if cmp := strings.Compare(a.Host, b.Host); cmp != 0 { return cmp } return strings.Compare(a.Port, b.Port) }) - return data, nil + return cfg, nil } -// buildSkillsInitContainer creates the unified init container and associated volumes -// for fetching skills from both Git repositories and OCI registries. -// If authSecretRef is non-nil a single Secret volume is created and mounted at /git-auth. -// If imagePullSecrets is non-empty, each kubernetes.io/dockerconfigjson secret is mounted -// under /docker-secrets/ and the script merges them into a single config.json in /tmp; -// krane reads the credentials via the DOCKER_CONFIG env var exported by the script. +// SkillsInitConfigMapSuffix is appended to the Agent name to form the +// ConfigMap that carries the skills-init container's JSON config. +const SkillsInitConfigMapSuffix = "-skills-init" + +// SkillsInitConfigMapName returns the name of the skills-init ConfigMap for +// the given Agent. +func SkillsInitConfigMapName(agentName string) string { + return agentName + SkillsInitConfigMapSuffix +} + +// buildSkillsInitContainer assembles the init container, its volumes, and the +// ConfigMap holding its JSON configuration. The container runs a kagent-owned +// Go binary that consumes the ConfigMap; no shell is involved, so +// user-controlled CRD fields cannot inject commands. +// +// If authSecretRef is non-nil a Secret is mounted at AuthMountPath. +// If imagePullSecrets is non-empty, each kubernetes.io/dockerconfigjson secret +// is mounted under DockerSecretsDir/; the binary merges them into a +// single config.json and sets DOCKER_CONFIG for the OCI client library. func buildSkillsInitContainer( + agentName, agentNamespace string, gitRefs []v1alpha2.GitRepo, authSecretRef *corev1.LocalObjectReference, ociRefs []string, insecureOCI bool, securityContext *corev1.SecurityContext, - env []corev1.EnvVar, + envVars []corev1.EnvVar, resources corev1.ResourceRequirements, imagePullSecrets []corev1.LocalObjectReference, -) (containers []corev1.Container, volumes []corev1.Volume, err error) { - // Collect secret names for the script template. +) (containers []corev1.Container, volumes []corev1.Volume, configMap *corev1.ConfigMap, err error) { pullSecretNames := make([]string, len(imagePullSecrets)) for i, s := range imagePullSecrets { pullSecretNames[i] = s.Name } - data, err := prepareSkillsInitData(gitRefs, authSecretRef, ociRefs, insecureOCI, pullSecretNames) + cfg, err := prepareSkillsInitConfig(gitRefs, authSecretRef, ociRefs, insecureOCI, pullSecretNames) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - script, err := buildSkillsScript(data) + cfgJSON, err := json.Marshal(cfg) if err != nil { - return nil, nil, err + return nil, nil, nil, fmt.Errorf("marshal skills-init config: %w", err) + } + + cmName := SkillsInitConfigMapName(agentName) + configMap = &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: agentNamespace, + }, + Data: map[string]string{ + skillsinit.ConfigMapKey: string(cfgJSON), + }, } + initSecCtx := securityContext if initSecCtx != nil { initSecCtx = initSecCtx.DeepCopy() } + const configVolumeName = "skills-init-config" + volumes = append(volumes, corev1.Volume{ + Name: configVolumeName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: cmName}, + }, + }, + }) volumeMounts := []corev1.VolumeMount{ - {Name: "kagent-skills", MountPath: "/skills"}, + {Name: "kagent-skills", MountPath: skillsinit.SkillsDir}, + {Name: configVolumeName, MountPath: skillsinit.ConfigMountPath, ReadOnly: true}, } - // Mount single auth secret if provided. if authSecretRef != nil { volumes = append(volumes, corev1.Volume{ Name: "git-auth", @@ -1375,13 +1367,11 @@ func buildSkillsInitContainer( }) volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: "git-auth", - MountPath: "/git-auth", + MountPath: skillsinit.AuthMountPath, ReadOnly: true, }) } - // Mount each imagePullSecret directly into skills-init under /docker-secrets/. - // The script merges them into /tmp/kagent-docker-config/config.json and exports DOCKER_CONFIG. for _, secret := range imagePullSecrets { volName := "pull-secret-" + secret.Name volumes = append(volumes, corev1.Volume{ @@ -1394,7 +1384,7 @@ func buildSkillsInitContainer( }) volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: volName, - MountPath: "/docker-secrets/" + secret.Name, + MountPath: skillsinit.DockerSecretsDir + "/" + secret.Name, ReadOnly: true, }) } @@ -1402,15 +1392,15 @@ func buildSkillsInitContainer( skillsInitContainer := corev1.Container{ Name: "skills-init", Image: DefaultSkillsInitImageConfig.Image(), - Command: []string{"/bin/sh", "-c", script}, + Command: []string{"/usr/local/bin/skills-init"}, VolumeMounts: volumeMounts, SecurityContext: initSecCtx, - Env: env, + Env: envVars, Resources: resources, } containers = append(containers, skillsInitContainer) - return containers, volumes, nil + return containers, volumes, configMap, nil } func (a *adkApiTranslator) runPlugins(ctx context.Context, agent v1alpha2.AgentObject, outputs *AgentOutputs) error { diff --git a/go/core/internal/controller/translator/agent/git_skills_test.go b/go/core/internal/controller/translator/agent/git_skills_test.go index a1c49dfe7a..6a95f5049c 100644 --- a/go/core/internal/controller/translator/agent/git_skills_test.go +++ b/go/core/internal/controller/translator/agent/git_skills_test.go @@ -2,6 +2,7 @@ package agent_test import ( "context" + "encoding/json" "testing" "github.com/stretchr/testify/assert" @@ -9,15 +10,36 @@ import ( "github.com/kagent-dev/kagent/go/api/v1alpha2" translator "github.com/kagent-dev/kagent/go/core/internal/controller/translator/agent" + "github.com/kagent-dev/kagent/go/core/internal/skillsinit" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" schemev1 "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +// findSkillsInitConfig returns the parsed skills-init ConfigMap content from +// the translator outputs. Fails the test if no matching ConfigMap exists. +func findSkillsInitConfig(t *testing.T, manifest []client.Object, agentName string) skillsinit.Config { + t.Helper() + want := translator.SkillsInitConfigMapName(agentName) + for _, obj := range manifest { + cm, ok := obj.(*corev1.ConfigMap) + if !ok || cm.Name != want { + continue + } + var cfg skillsinit.Config + require.NoError(t, json.Unmarshal([]byte(cm.Data[skillsinit.ConfigMapKey]), &cfg), + "skills-init ConfigMap %q has invalid JSON", cm.Name) + return cfg + } + t.Fatalf("skills-init ConfigMap %q not found in manifest", want) + return skillsinit.Config{} +} + func Test_AdkApiTranslator_Skills(t *testing.T) { scheme := schemev1.Scheme require.NoError(t, v1alpha2.AddToScheme(scheme)) @@ -338,33 +360,58 @@ func Test_AdkApiTranslator_Skills(t *testing.T) { // There should be exactly one init container assert.Len(t, initContainers, 1, "should have exactly one init container") - // Verify the script is passed via /bin/sh -c - require.Len(t, skillsInitContainer.Command, 3) - assert.Equal(t, "/bin/sh", skillsInitContainer.Command[0]) - assert.Equal(t, "-c", skillsInitContainer.Command[1]) - script := skillsInitContainer.Command[2] + // The new binary is invoked with a single argv entry. + require.Len(t, skillsInitContainer.Command, 1) + assert.Equal(t, "/usr/local/bin/skills-init", skillsInitContainer.Command[0]) + + cfg := findSkillsInitConfig(t, outputs.Manifest, tt.agent.Name) if tt.wantContainsBranch != "" { - assert.Contains(t, script, tt.wantContainsBranch) - assert.Contains(t, script, "--branch") + found := false + for _, g := range cfg.GitRefs { + if g.Ref == tt.wantContainsBranch && !g.IsCommit { + found = true + } + } + assert.True(t, found, "expected git ref with branch %q", tt.wantContainsBranch) } if tt.wantContainsCommit != "" { - assert.Contains(t, script, tt.wantContainsCommit) - assert.Contains(t, script, "git checkout") + found := false + for _, g := range cfg.GitRefs { + if g.Ref == tt.wantContainsCommit && g.IsCommit { + found = true + } + } + assert.True(t, found, "expected git ref with commit %q", tt.wantContainsCommit) } if tt.wantContainsPath != "" { - assert.Contains(t, script, tt.wantContainsPath) - assert.Contains(t, script, "mktemp") + found := false + for _, g := range cfg.GitRefs { + if g.SubPath == tt.wantContainsPath { + found = true + } + } + assert.True(t, found, "expected git ref with subpath %q", tt.wantContainsPath) } if tt.wantContainsKrane { - assert.Contains(t, script, "krane export") + assert.NotEmpty(t, cfg.OCIRefs, "expected OCI refs in config") } + // wantScriptContains is reused as a list of substrings expected + // across the structured config (host names for ssh-keyscan, etc). + cfgBlob, _ := json.Marshal(cfg) for _, want := range tt.wantScriptContains { - assert.Contains(t, script, want) + switch want { + case "ssh-keyscan": + assert.NotEmpty(t, cfg.SSHHosts, "expected SSHHosts to be set for ssh-keyscan") + case "credential.helper": + assert.NotEmpty(t, cfg.AuthMountPath, "expected AuthMountPath to be set for credential helper") + default: + assert.Contains(t, string(cfgBlob), want) + } } // Verify /skills volume mount exists @@ -421,8 +468,8 @@ func Test_AdkApiTranslator_Skills(t *testing.T) { // Verify insecure flag for OCI skills if tt.agent.Spec.Skills != nil && tt.agent.Spec.Skills.InsecureSkipVerify { require.NotNil(t, skillsInitContainer) - script := skillsInitContainer.Command[2] - assert.Contains(t, script, "--insecure") + cfg := findSkillsInitConfig(t, outputs.Manifest, tt.agent.Name) + assert.True(t, cfg.InsecureOCI, "InsecureOCI should be true") } }) } @@ -545,8 +592,10 @@ func Test_AdkApiTranslator_SkillsImagePullSecrets(t *testing.T) { require.Equal(t, "skills-init", initContainers[0].Name, "the single init container must be skills-init") skillsInitContainer := &initContainers[0] - require.Len(t, skillsInitContainer.Command, 3) - script := skillsInitContainer.Command[2] + require.Len(t, skillsInitContainer.Command, 1) + assert.Equal(t, "/usr/local/bin/skills-init", skillsInitContainer.Command[0]) + + cfg := findSkillsInitConfig(t, outputs.Manifest, tt.agent.Name) // No docker-auth-init container should ever exist. for _, c := range initContainers { @@ -558,13 +607,20 @@ func Test_AdkApiTranslator_SkillsImagePullSecrets(t *testing.T) { } if tt.wantImagePullSecret { - // Script must contain the credential merge logic. - assert.Contains(t, script, "jq") - assert.Contains(t, script, ".dockerconfigjson") - assert.Contains(t, script, "/tmp/kagent-docker-config/config.json") - assert.Contains(t, script, "export DOCKER_CONFIG=/tmp/kagent-docker-config") - require.NotNil(t, tt.agent.Spec.Skills) + // Config must list each imagePullSecret. + assert.ElementsMatch(t, + func() []string { + out := make([]string, 0, len(tt.agent.Spec.Skills.ImagePullSecrets)) + for _, ps := range tt.agent.Spec.Skills.ImagePullSecrets { + out = append(out, ps.Name) + } + return out + }(), + cfg.ImagePullSecrets, + "config should reference all imagePullSecrets", + ) + for _, ps := range tt.agent.Spec.Skills.ImagePullSecrets { volName := "pull-secret-" + ps.Name @@ -585,14 +641,10 @@ func Test_AdkApiTranslator_SkillsImagePullSecrets(t *testing.T) { } } assert.True(t, hasPullSecretMount, "skills-init should mount pull-secret %q at /docker-secrets/%s", volName, ps.Name) - - // Script references each secret by name. - assert.Contains(t, script, "/docker-secrets/"+ps.Name+"/.dockerconfigjson") } } else { - // No credential merge logic in the script. - assert.NotContains(t, script, "DOCKER_CONFIG") - assert.NotContains(t, script, "kagent-docker-config") + // No imagePullSecrets in config. + assert.Empty(t, cfg.ImagePullSecrets, "no imagePullSecrets expected in config") // No pull-secret volumes. for _, v := range deployment.Spec.Template.Spec.Volumes { assert.False(t, len(v.Name) > len("pull-secret-") && v.Name[:len("pull-secret-")] == "pull-secret-", diff --git a/go/core/internal/controller/translator/agent/manifest_builder.go b/go/core/internal/controller/translator/agent/manifest_builder.go index 3b059bbef5..bd27bf30a2 100644 --- a/go/core/internal/controller/translator/agent/manifest_builder.go +++ b/go/core/internal/controller/translator/agent/manifest_builder.go @@ -2,6 +2,8 @@ package agent import ( "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" "maps" @@ -9,6 +11,7 @@ import ( "github.com/kagent-dev/kagent/go/api/adk" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/controller/translator/labels" + "github.com/kagent-dev/kagent/go/core/internal/skillsinit" "github.com/kagent-dev/kagent/go/core/internal/utils" "github.com/kagent-dev/kagent/go/core/pkg/env" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" @@ -40,6 +43,11 @@ type podRuntimeInputs struct { volumes []corev1.Volume volumeMounts []corev1.VolumeMount securityContext *corev1.SecurityContext + // skillsInitConfigMap is the ConfigMap (when skills are configured) that + // carries the JSON configuration consumed by the skills-init binary. It + // is added to AgentOutputs.Manifest and content-hashed into the pod + // template annotations so changes trigger a rollout. + skillsInitConfigMap *corev1.ConfigMap } func (a *adkApiTranslator) BuildManifest( @@ -72,6 +80,10 @@ func (a *adkApiTranslator) BuildManifest( return nil, err } + if podRuntime.skillsInitConfigMap != nil { + outputs.Manifest = append(outputs.Manifest, podRuntime.skillsInitConfigMap) + } + podTemplate := buildPodTemplate(manifestCtx, podRuntime, configSecret.configHash) workloadObjects, err := a.buildWorkloadObjects(ctx, manifestCtx, podTemplate) @@ -250,7 +262,7 @@ func buildPodRuntime( volumeMounts = append(volumeMounts, manifestCtx.deployment.VolumeMounts...) needCodeExecIsolation := cfg != nil && cfg.GetExecuteCode() - initContainers, err := buildSkillsRuntime(manifestCtx, &sharedEnv, &volumes, &volumeMounts, &needCodeExecIsolation) + initContainers, skillsInitCM, err := buildSkillsRuntime(manifestCtx, &sharedEnv, &volumes, &volumeMounts, &needCodeExecIsolation) if err != nil { return nil, err } @@ -272,11 +284,12 @@ func buildPodRuntime( envVars = append(envVars, sharedEnv...) return &podRuntimeInputs{ - initContainers: initContainers, - envVars: envVars, - volumes: volumes, - volumeMounts: volumeMounts, - securityContext: buildContainerSecurityContext(manifestCtx.deployment.SecurityContext, needCodeExecIsolation), + initContainers: initContainers, + envVars: envVars, + volumes: volumes, + volumeMounts: volumeMounts, + securityContext: buildContainerSecurityContext(manifestCtx.deployment.SecurityContext, needCodeExecIsolation), + skillsInitConfigMap: skillsInitCM, }, nil } @@ -340,16 +353,16 @@ func buildSkillsRuntime( volumes *[]corev1.Volume, volumeMounts *[]corev1.VolumeMount, needCodeExecIsolation *bool, -) ([]corev1.Container, error) { +) ([]corev1.Container, *corev1.ConfigMap, error) { spec := manifestCtx.agent.GetAgentSpec() if spec.Skills == nil { - return nil, nil + return nil, nil, nil } skills := spec.Skills.Refs gitRefs := spec.Skills.GitRefs if len(skills) == 0 && len(gitRefs) == 0 { - return nil, nil + return nil, nil, nil } *needCodeExecIsolation = true @@ -378,7 +391,9 @@ func buildSkillsRuntime( initEnv = append(initEnv, spec.Skills.InitContainer.Env...) } - container, skillsVolumes, err := buildSkillsInitContainer( + container, skillsVolumes, configMap, err := buildSkillsInitContainer( + manifestCtx.agent.GetName(), + manifestCtx.agent.GetNamespace(), gitRefs, spec.Skills.GitAuthSecretRef, skills, @@ -389,11 +404,11 @@ func buildSkillsRuntime( spec.Skills.ImagePullSecrets, ) if err != nil { - return nil, fmt.Errorf("failed to build skills init container: %w", err) + return nil, nil, fmt.Errorf("failed to build skills init container: %w", err) } *volumes = append(*volumes, skillsVolumes...) - return container, nil + return container, configMap, nil } func projectedTokenVolume() corev1.Volume { @@ -443,6 +458,14 @@ func buildPodTemplate( podTemplateAnnotations = map[string]string{} } podTemplateAnnotations["kagent.dev/config-hash"] = fmt.Sprintf("%d", configHash) + if cm := runtimeInputs.skillsInitConfigMap; cm != nil { + // Hash the rendered config so a content change in the skills-init + // ConfigMap triggers a pod rollout — the PodSpec only names the + // ConfigMap, so without this annotation Kubernetes would leave the + // pod running against stale config. + sum := sha256.Sum256([]byte(cm.Data[skillsinit.ConfigMapKey])) + podTemplateAnnotations["kagent.dev/skills-init-hash"] = hex.EncodeToString(sum[:8]) + } probeConf := getRuntimeProbeConfig(agentRuntime(manifestCtx.agent.GetAgentSpec())) diff --git a/go/core/internal/controller/translator/agent/skills-init.sh.tmpl b/go/core/internal/controller/translator/agent/skills-init.sh.tmpl deleted file mode 100644 index 4db51dd0d5..0000000000 --- a/go/core/internal/controller/translator/agent/skills-init.sh.tmpl +++ /dev/null @@ -1,95 +0,0 @@ -set -e -{{- if .ImagePullSecrets }} -mkdir -p /tmp/kagent-docker-config -merged='{"auths":{}}' -{{- range .ImagePullSecrets }} -if [ -f /docker-secrets/{{ . }}/.dockerconfigjson ]; then - merged="$(printf '%s\n%s\n' "$merged" "$(cat /docker-secrets/{{ . }}/.dockerconfigjson)" | jq -s '.[0].auths * .[1].auths | {"auths": .}')" -fi -{{- end }} -printf '%s' "$merged" > /tmp/kagent-docker-config/config.json -export DOCKER_CONFIG=/tmp/kagent-docker-config -{{- end }} -{{- if .AuthMountPath }} -_auth_mount="$(cat <<'ENDVAL' -{{ .AuthMountPath }} -ENDVAL -)" -if [ -f "${_auth_mount}/ssh-privatekey" ]; then - mkdir -p ~/.ssh - chmod 700 ~/.ssh - cp "${_auth_mount}/ssh-privatekey" ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - touch ~/.ssh/known_hosts - chmod 600 ~/.ssh/known_hosts -{{- range .SSHHosts }} -{{- if .Port }} - ssh-keyscan -H -p "{{ .Port }}" "{{ .Host }}" >> ~/.ssh/known_hosts -{{- else }} - ssh-keyscan -H "{{ .Host }}" >> ~/.ssh/known_hosts -{{- end }} -{{- end }} -elif [ -f "${_auth_mount}/token" ]; then - git config --global credential.helper "!f() { echo username=x-access-token; echo password=\$(cat ${_auth_mount}/token); }; f" -fi -{{- end }} -{{- range .GitRefs }} -_url="$(cat <<'ENDVAL' -{{ .URL }} -ENDVAL -)" -_ref="$(cat <<'ENDVAL' -{{ .Ref }} -ENDVAL -)" -_dest="$(cat <<'ENDVAL' -{{ .Dest }} -ENDVAL -)" -{{- if .IsCommit }} -echo "Cloning ${_url} (commit ${_ref}) into ${_dest}" -git clone -- "$_url" "$_dest" -cd "$_dest" && git checkout "$_ref" -{{- else }} -echo "Cloning ${_url} (ref ${_ref}) into ${_dest}" -git clone --depth 1 --branch "$_ref" -- "$_url" "$_dest" -{{- end }} -{{- if .SubPath }} -_subpath="$(cat <<'ENDVAL' -{{ .SubPath }} -ENDVAL -)" -_tmp="$(mktemp -d)" -cp -rL "${_dest}/${_subpath}/." "$_tmp/" -rm -rf "$_dest" -mv "$_tmp" "$_dest" -{{- end }} -{{- end }} -{{- range .OCIRefs }} -_image="$(cat <<'ENDVAL' -{{ .Image }} -ENDVAL -)" -_dest="$(cat <<'ENDVAL' -{{ .Dest }} -ENDVAL -)" -echo "Exporting OCI image ${_image} into ${_dest}" -_uname="$(uname -m)" -case "$_uname" in - x86_64|amd64) - _arch="amd64" - ;; - aarch64|arm64) - _arch="arm64" - ;; - *) - echo "Unsupported architecture for OCI export: ${_uname}" >&2 - exit 1 - ;; -esac -krane export{{ if $.InsecureOCI }} --insecure{{ end }} --platform "linux/${_arch}" "$_image" '/tmp/oci-skill.tar' -mkdir -p "$_dest" -tar xf '/tmp/oci-skill.tar' -C "$_dest" -rm -f '/tmp/oci-skill.tar' -{{- end }} diff --git a/go/core/internal/controller/translator/agent/skills_unit_test.go b/go/core/internal/controller/translator/agent/skills_unit_test.go index 6abc8c2dc3..e603f44531 100644 --- a/go/core/internal/controller/translator/agent/skills_unit_test.go +++ b/go/core/internal/controller/translator/agent/skills_unit_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/internal/skillsinit" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -119,7 +120,7 @@ func Test_gitSSHHost(t *testing.T) { tests := []struct { name string rawURL string - want sshHostData + want skillsinit.SSHHost wantOK bool }{ { @@ -130,31 +131,31 @@ func Test_gitSSHHost(t *testing.T) { { name: "scp-style ssh repo", rawURL: "git@github.com:org/repo.git", - want: sshHostData{Host: "github.com"}, + want: skillsinit.SSHHost{Host: "github.com"}, wantOK: true, }, { name: "ssh url with non-default port", rawURL: "ssh://git@gitea-ssh.gitea:2222/gitops/repo.git", - want: sshHostData{Host: "gitea-ssh.gitea", Port: "2222"}, + want: skillsinit.SSHHost{Host: "gitea-ssh.gitea", Port: "2222"}, wantOK: true, }, { name: "ssh url without explicit port", rawURL: "ssh://git@gitea-ssh.gitea/gitops/repo.git", - want: sshHostData{Host: "gitea-ssh.gitea"}, + want: skillsinit.SSHHost{Host: "gitea-ssh.gitea"}, wantOK: true, }, { name: "git+ssh url with port", rawURL: "git+ssh://git@example.com:2222/org/repo.git", - want: sshHostData{Host: "example.com", Port: "2222"}, + want: skillsinit.SSHHost{Host: "example.com", Port: "2222"}, wantOK: true, }, { name: "ssh url with default port 22 normalizes to empty", rawURL: "ssh://git@gitea-ssh.gitea:22/gitops/repo.git", - want: sshHostData{Host: "gitea-ssh.gitea"}, + want: skillsinit.SSHHost{Host: "gitea-ssh.gitea"}, wantOK: true, }, { @@ -223,7 +224,7 @@ func Test_validateSubPath(t *testing.T) { } } -func Test_prepareSkillsInitData_duplicateNames(t *testing.T) { +func Test_prepareSkillsInitConfig_duplicateNames(t *testing.T) { tests := []struct { name string gitRefs []v1alpha2.GitRepo @@ -276,7 +277,7 @@ func Test_prepareSkillsInitData_duplicateNames(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - _, err := prepareSkillsInitData(tt.gitRefs, nil, tt.ociRefs, false, nil) + _, err := prepareSkillsInitConfig(tt.gitRefs, nil, tt.ociRefs, false, nil) if tt.wantErr != "" { require.Error(t, err) assert.Contains(t, err.Error(), tt.wantErr) @@ -287,8 +288,8 @@ func Test_prepareSkillsInitData_duplicateNames(t *testing.T) { } } -func Test_prepareSkillsInitData_pathTraversal(t *testing.T) { - _, err := prepareSkillsInitData( +func Test_prepareSkillsInitConfig_pathTraversal(t *testing.T) { + _, err := prepareSkillsInitConfig( []v1alpha2.GitRepo{ {URL: "https://github.com/org/repo", Ref: "main", Path: "../escape"}, }, @@ -298,8 +299,8 @@ func Test_prepareSkillsInitData_pathTraversal(t *testing.T) { assert.Contains(t, err.Error(), "must not contain '..'") } -func Test_prepareSkillsInitData_absolutePath(t *testing.T) { - _, err := prepareSkillsInitData( +func Test_prepareSkillsInitConfig_absolutePath(t *testing.T) { + _, err := prepareSkillsInitConfig( []v1alpha2.GitRepo{ {URL: "https://github.com/org/repo", Ref: "main", Path: "/etc/passwd"}, }, @@ -309,8 +310,8 @@ func Test_prepareSkillsInitData_absolutePath(t *testing.T) { assert.Contains(t, err.Error(), "must be relative") } -func Test_prepareSkillsInitData_authMountPath(t *testing.T) { - data, err := prepareSkillsInitData( +func Test_prepareSkillsInitConfig_authMountPath(t *testing.T) { + data, err := prepareSkillsInitConfig( []v1alpha2.GitRepo{{URL: "https://github.com/org/repo", Ref: "main"}}, &corev1.LocalObjectReference{Name: "my-secret"}, nil, false, nil, @@ -319,8 +320,8 @@ func Test_prepareSkillsInitData_authMountPath(t *testing.T) { assert.Equal(t, "/git-auth", data.AuthMountPath) } -func Test_prepareSkillsInitData_sshHosts(t *testing.T) { - data, err := prepareSkillsInitData( +func Test_prepareSkillsInitConfig_sshHosts(t *testing.T) { + data, err := prepareSkillsInitConfig( []v1alpha2.GitRepo{ {URL: "https://github.com/org/https-repo", Ref: "main"}, {URL: "git@github.com:org/scp-repo.git", Ref: "main"}, @@ -332,14 +333,14 @@ func Test_prepareSkillsInitData_sshHosts(t *testing.T) { false, nil, ) require.NoError(t, err) - assert.Equal(t, []sshHostData{ + assert.Equal(t, []skillsinit.SSHHost{ {Host: "gitea-ssh.gitea"}, {Host: "github.com"}, }, data.SSHHosts) } -func Test_prepareSkillsInitData_sshHostsDedupesDefaultPort(t *testing.T) { - data, err := prepareSkillsInitData( +func Test_prepareSkillsInitConfig_sshHostsDedupesDefaultPort(t *testing.T) { + data, err := prepareSkillsInitConfig( []v1alpha2.GitRepo{ {URL: "git@github.com:org/scp-repo.git", Ref: "main"}, {URL: "ssh://git@github.com:22/org/ssh-repo.git", Ref: "main", Name: "ssh-repo"}, @@ -349,13 +350,13 @@ func Test_prepareSkillsInitData_sshHostsDedupesDefaultPort(t *testing.T) { false, nil, ) require.NoError(t, err) - assert.Equal(t, []sshHostData{ + assert.Equal(t, []skillsinit.SSHHost{ {Host: "github.com"}, }, data.SSHHosts) } -func Test_prepareSkillsInitData_noAuthSkipsSSHHosts(t *testing.T) { - data, err := prepareSkillsInitData( +func Test_prepareSkillsInitConfig_noAuthSkipsSSHHosts(t *testing.T) { + data, err := prepareSkillsInitConfig( []v1alpha2.GitRepo{ {URL: "git@github.com:org/scp-repo.git", Ref: "main"}, {URL: "ssh://git@gitea-ssh.gitea/gitops/ssh-repo.git", Ref: "main", Name: "ssh-repo"}, diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json index 98cc97beab..48473e031a 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json @@ -91,6 +91,25 @@ ] } }, + { + "data": { + "config.json": "{\"authMountPath\":\"/git-auth\",\"gitRefs\":[{\"url\":\"https://github.com/org/my-skills\",\"ref\":\"v2.0.0\",\"dest\":\"/skills/k8s-skill\",\"subPath\":\"skills/k8s\"},{\"url\":\"https://github.com/org/another-skill\",\"ref\":\"abc123def456abc123def456abc123def456abc1\",\"dest\":\"/skills/another-skill\",\"isCommit\":true},{\"url\":\"https://github.com/org/private-skill\",\"ref\":\"main\",\"dest\":\"/skills/private-skill\"}],\"ociRefs\":[{\"image\":\"ghcr.io/org/oci-skill:v1.0\",\"dest\":\"/skills/oci-skill\"}]}" + }, + "metadata": { + "name": "git-skills-agent-skills-init", + "namespace": "test", + "ownerReferences": [ + { + "apiVersion": "kagent.dev/v1alpha2", + "blockOwnerDeletion": true, + "controller": true, + "kind": "Agent", + "name": "git-skills-agent", + "uid": "" + } + ] + } + }, { "apiVersion": "apps/v1", "kind": "Deployment", @@ -132,7 +151,8 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "9443054578640766875" + "kagent.dev/config-hash": "9443054578640766875", + "kagent.dev/skills-init-hash": "9fa4c702efa19e21" }, "labels": { "app": "kagent", @@ -239,9 +259,7 @@ "initContainers": [ { "command": [ - "/bin/sh", - "-c", - "set -e\n_auth_mount=\"$(cat \u003c\u003c'ENDVAL'\n/git-auth\nENDVAL\n)\"\nif [ -f \"${_auth_mount}/ssh-privatekey\" ]; then\n mkdir -p ~/.ssh\n chmod 700 ~/.ssh\n cp \"${_auth_mount}/ssh-privatekey\" ~/.ssh/id_rsa\n chmod 600 ~/.ssh/id_rsa\n touch ~/.ssh/known_hosts\n chmod 600 ~/.ssh/known_hosts\nelif [ -f \"${_auth_mount}/token\" ]; then\n git config --global credential.helper \"!f() { echo username=x-access-token; echo password=\\$(cat ${_auth_mount}/token); }; f\"\nfi\n_url=\"$(cat \u003c\u003c'ENDVAL'\nhttps://github.com/org/my-skills\nENDVAL\n)\"\n_ref=\"$(cat \u003c\u003c'ENDVAL'\nv2.0.0\nENDVAL\n)\"\n_dest=\"$(cat \u003c\u003c'ENDVAL'\n/skills/k8s-skill\nENDVAL\n)\"\necho \"Cloning ${_url} (ref ${_ref}) into ${_dest}\"\ngit clone --depth 1 --branch \"$_ref\" -- \"$_url\" \"$_dest\"\n_subpath=\"$(cat \u003c\u003c'ENDVAL'\nskills/k8s\nENDVAL\n)\"\n_tmp=\"$(mktemp -d)\"\ncp -rL \"${_dest}/${_subpath}/.\" \"$_tmp/\"\nrm -rf \"$_dest\"\nmv \"$_tmp\" \"$_dest\"\n_url=\"$(cat \u003c\u003c'ENDVAL'\nhttps://github.com/org/another-skill\nENDVAL\n)\"\n_ref=\"$(cat \u003c\u003c'ENDVAL'\nabc123def456abc123def456abc123def456abc1\nENDVAL\n)\"\n_dest=\"$(cat \u003c\u003c'ENDVAL'\n/skills/another-skill\nENDVAL\n)\"\necho \"Cloning ${_url} (commit ${_ref}) into ${_dest}\"\ngit clone -- \"$_url\" \"$_dest\"\ncd \"$_dest\" \u0026\u0026 git checkout \"$_ref\"\n_url=\"$(cat \u003c\u003c'ENDVAL'\nhttps://github.com/org/private-skill\nENDVAL\n)\"\n_ref=\"$(cat \u003c\u003c'ENDVAL'\nmain\nENDVAL\n)\"\n_dest=\"$(cat \u003c\u003c'ENDVAL'\n/skills/private-skill\nENDVAL\n)\"\necho \"Cloning ${_url} (ref ${_ref}) into ${_dest}\"\ngit clone --depth 1 --branch \"$_ref\" -- \"$_url\" \"$_dest\"\n_image=\"$(cat \u003c\u003c'ENDVAL'\nghcr.io/org/oci-skill:v1.0\nENDVAL\n)\"\n_dest=\"$(cat \u003c\u003c'ENDVAL'\n/skills/oci-skill\nENDVAL\n)\"\necho \"Exporting OCI image ${_image} into ${_dest}\"\n_uname=\"$(uname -m)\"\ncase \"$_uname\" in\n x86_64|amd64)\n _arch=\"amd64\"\n ;;\n aarch64|arm64)\n _arch=\"arm64\"\n ;;\n *)\n echo \"Unsupported architecture for OCI export: ${_uname}\" \u003e\u00262\n exit 1\n ;;\nesac\nkrane export --platform \"linux/${_arch}\" \"$_image\" '/tmp/oci-skill.tar'\nmkdir -p \"$_dest\"\ntar xf '/tmp/oci-skill.tar' -C \"$_dest\"\nrm -f '/tmp/oci-skill.tar'\n" + "/usr/local/bin/skills-init" ], "image": "cr.kagent.dev/kagent-dev/kagent/skills-init:dev", "name": "skills-init", @@ -260,6 +278,11 @@ "mountPath": "/skills", "name": "kagent-skills" }, + { + "mountPath": "/etc/kagent/skills-init", + "name": "skills-init-config", + "readOnly": true + }, { "mountPath": "/git-auth", "name": "git-auth", @@ -280,6 +303,12 @@ "emptyDir": {}, "name": "kagent-skills" }, + { + "configMap": { + "name": "git-skills-agent-skills-init" + }, + "name": "skills-init-config" + }, { "name": "git-auth", "secret": { diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json index e65fa19d57..a692fa0852 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json @@ -91,6 +91,25 @@ ] } }, + { + "data": { + "config.json": "{\"ociRefs\":[{\"image\":\"foo:latest\",\"dest\":\"/skills/foo\"}]}" + }, + "metadata": { + "name": "skills-agent-skills-init", + "namespace": "test", + "ownerReferences": [ + { + "apiVersion": "kagent.dev/v1alpha2", + "blockOwnerDeletion": true, + "controller": true, + "kind": "Agent", + "name": "skills-agent", + "uid": "" + } + ] + } + }, { "apiVersion": "apps/v1", "kind": "Deployment", @@ -132,7 +151,8 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "4496754913718271849" + "kagent.dev/config-hash": "4496754913718271849", + "kagent.dev/skills-init-hash": "a03b999a4613cdde" }, "labels": { "app": "kagent", @@ -239,9 +259,7 @@ "initContainers": [ { "command": [ - "/bin/sh", - "-c", - "set -e\n_image=\"$(cat \u003c\u003c'ENDVAL'\nfoo:latest\nENDVAL\n)\"\n_dest=\"$(cat \u003c\u003c'ENDVAL'\n/skills/foo\nENDVAL\n)\"\necho \"Exporting OCI image ${_image} into ${_dest}\"\n_uname=\"$(uname -m)\"\ncase \"$_uname\" in\n x86_64|amd64)\n _arch=\"amd64\"\n ;;\n aarch64|arm64)\n _arch=\"arm64\"\n ;;\n *)\n echo \"Unsupported architecture for OCI export: ${_uname}\" \u003e\u00262\n exit 1\n ;;\nesac\nkrane export --platform \"linux/${_arch}\" \"$_image\" '/tmp/oci-skill.tar'\nmkdir -p \"$_dest\"\ntar xf '/tmp/oci-skill.tar' -C \"$_dest\"\nrm -f '/tmp/oci-skill.tar'\n" + "/usr/local/bin/skills-init" ], "image": "cr.kagent.dev/kagent-dev/kagent/skills-init:dev", "name": "skills-init", @@ -259,6 +277,11 @@ { "mountPath": "/skills", "name": "kagent-skills" + }, + { + "mountPath": "/etc/kagent/skills-init", + "name": "skills-init-config", + "readOnly": true } ] } @@ -275,6 +298,12 @@ "emptyDir": {}, "name": "kagent-skills" }, + { + "configMap": { + "name": "skills-agent-skills-init" + }, + "name": "skills-init-config" + }, { "name": "kagent-token", "projected": { diff --git a/go/core/internal/skillsinit/config.go b/go/core/internal/skillsinit/config.go new file mode 100644 index 0000000000..3af4d2500b --- /dev/null +++ b/go/core/internal/skillsinit/config.go @@ -0,0 +1,78 @@ +// Package skillsinit defines the contract between the kagent controller and +// the skills-init container binary. The controller renders a Config to JSON, +// projects it via a ConfigMap, and mounts it at /etc/kagent/skills-init. The +// binary deserializes it and performs the fetch operations. +// +// User-controlled values flow through structured JSON and then into argv-style +// process calls (never a shell), so shell metacharacters in user fields are +// inert and cannot trigger command execution. +package skillsinit + +const ( + // ConfigMountPath is where the ConfigMap is mounted inside the container. + ConfigMountPath = "/etc/kagent/skills-init" + // ConfigFileName is the file the binary reads inside ConfigMountPath. + ConfigFileName = "config.json" + // ConfigMapKey is the key the controller writes inside the ConfigMap. + ConfigMapKey = "config.json" + + // SkillsDir is the shared volume both containers see; the binary writes + // fetched skill contents here. + SkillsDir = "/skills" + // AuthMountPath is where the optional git auth secret is mounted. + AuthMountPath = "/git-auth" + // DockerSecretsDir is where dockerconfigjson secrets are mounted, one + // per directory keyed by secret name. + DockerSecretsDir = "/docker-secrets" +) + +// Config is the full input the binary expects. Field names are stable and any +// change requires bumping the controller and the binary in lockstep. +type Config struct { + // AuthMountPath is non-empty when a gitAuthSecretRef was configured. + // When set the binary expects a token or ssh-privatekey at this path. + AuthMountPath string `json:"authMountPath,omitempty"` + + // GitRefs is the list of git repositories to clone. + GitRefs []GitRef `json:"gitRefs,omitempty"` + + // OCIRefs is the list of OCI images to pull and extract. + OCIRefs []OCIRef `json:"ociRefs,omitempty"` + + // InsecureOCI allows pulling from registries with untrusted certs. + InsecureOCI bool `json:"insecureOci,omitempty"` + + // SSHHosts are entries that should be pre-populated in ~/.ssh/known_hosts + // via ssh-keyscan before any git clone runs. + SSHHosts []SSHHost `json:"sshHosts,omitempty"` + + // ImagePullSecrets is the list of dockerconfigjson secret names mounted + // under DockerSecretsDir. The binary merges them into a single config.json + // that go-containerregistry consults during OCI pulls. + ImagePullSecrets []string `json:"imagePullSecrets,omitempty"` +} + +// GitRef describes a single git clone operation. +type GitRef struct { + URL string `json:"url"` + // Ref is a branch name, tag, or commit SHA. When IsCommit is true a + // full clone + checkout is used; otherwise a shallow --branch clone. + Ref string `json:"ref"` + Dest string `json:"dest"` + IsCommit bool `json:"isCommit,omitempty"` + // SubPath, if set, names a subdirectory inside the clone that becomes + // the final skill root. + SubPath string `json:"subPath,omitempty"` +} + +// OCIRef describes a single OCI image to pull and extract. +type OCIRef struct { + Image string `json:"image"` + Dest string `json:"dest"` +} + +// SSHHost is a known_hosts entry to seed with ssh-keyscan. +type SSHHost struct { + Host string `json:"host"` + Port string `json:"port,omitempty"` +} diff --git a/go/core/internal/skillsinit/copytree.go b/go/core/internal/skillsinit/copytree.go new file mode 100644 index 0000000000..52f82766a6 --- /dev/null +++ b/go/core/internal/skillsinit/copytree.go @@ -0,0 +1,82 @@ +package skillsinit + +import ( + "fmt" + "io" + "io/fs" + "os" + "path/filepath" +) + +// copyTree mirrors `cp -rL src/. dst/`: it dereferences symlinks (a symlink to +// a file becomes a regular file in dst; a symlink to a directory is walked). +// We follow links because the old script used `cp -rL`, matching git's +// in-repo symlinks pointing back into the same checkout. +// +// dst must already exist. +func copyTree(src, dst string) error { + rootInfo, err := os.Stat(src) + if err != nil { + return err + } + if !rootInfo.IsDir() { + return fmt.Errorf("copyTree source %q is not a directory", src) + } + + return walkFollow(src, "", func(rel string, info fs.FileInfo, openFile func() (io.ReadCloser, error)) error { + target := filepath.Join(dst, rel) + switch { + case info.IsDir(): + return os.MkdirAll(target, info.Mode().Perm()|0o700) + case info.Mode().IsRegular(): + if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil { + return err + } + in, err := openFile() + if err != nil { + return err + } + defer in.Close() + out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, info.Mode().Perm()) + if err != nil { + return err + } + defer out.Close() + _, err = io.Copy(out, in) + return err + default: + // Sockets, devices, fifos in a skill checkout don't make sense; + // skip them rather than failing. + return nil + } + }) +} + +// walkFollow walks src dereferencing symlinks. rel is the path relative to +// src ("" for the root). The walker invokes fn with a lazily-opened reader +// for regular files. +func walkFollow(src, rel string, fn func(rel string, info fs.FileInfo, openFile func() (io.ReadCloser, error)) error) error { + full := filepath.Join(src, rel) + info, err := os.Stat(full) // Stat follows symlinks + if err != nil { + return err + } + if rel != "" { + if err := fn(rel, info, func() (io.ReadCloser, error) { return os.Open(full) }); err != nil { + return err + } + } + if !info.IsDir() { + return nil + } + entries, err := os.ReadDir(full) + if err != nil { + return err + } + for _, e := range entries { + if err := walkFollow(src, filepath.Join(rel, e.Name()), fn); err != nil { + return err + } + } + return nil +} diff --git a/go/core/internal/skillsinit/docker.go b/go/core/internal/skillsinit/docker.go new file mode 100644 index 0000000000..38e4c2e70d --- /dev/null +++ b/go/core/internal/skillsinit/docker.go @@ -0,0 +1,52 @@ +package skillsinit + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +// MergeDockerConfigs reads each ///.dockerconfigjson, merges +// their auths maps into a single config, and writes it to outPath. Returns +// outPath's parent dir so callers can set DOCKER_CONFIG. +// +// Missing per-secret files are skipped silently (matches the old script +// behavior, where a misconfigured secret is non-fatal). A malformed file is +// an error. +func MergeDockerConfigs(secretsDir string, secretNames []string, outPath string) (dockerConfigDir string, err error) { + merged := map[string]any{"auths": map[string]any{}} + + for _, name := range secretNames { + path := filepath.Join(secretsDir, name, ".dockerconfigjson") + raw, readErr := os.ReadFile(path) + if readErr != nil { + if os.IsNotExist(readErr) { + continue + } + return "", fmt.Errorf("read %s: %w", path, readErr) + } + var parsed struct { + Auths map[string]any `json:"auths"` + } + if err := json.Unmarshal(raw, &parsed); err != nil { + return "", fmt.Errorf("parse %s: %w", path, err) + } + dst := merged["auths"].(map[string]any) + for k, v := range parsed.Auths { + dst[k] = v + } + } + + if err := os.MkdirAll(filepath.Dir(outPath), 0o700); err != nil { + return "", fmt.Errorf("mkdir docker config: %w", err) + } + buf, err := json.Marshal(merged) + if err != nil { + return "", fmt.Errorf("marshal docker config: %w", err) + } + if err := os.WriteFile(outPath, buf, 0o600); err != nil { + return "", fmt.Errorf("write %s: %w", outPath, err) + } + return filepath.Dir(outPath), nil +} diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go new file mode 100644 index 0000000000..3d0281afce --- /dev/null +++ b/go/core/internal/skillsinit/git.go @@ -0,0 +1,129 @@ +package skillsinit + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" +) + +// CloneGit fetches a single git ref into ref.Dest. All user-controlled +// strings (URL, Ref, SubPath) are passed to git as separate argv entries via +// exec.Command — they never pass through a shell, so metacharacters in any of +// them are inert. +// +// When ref.IsCommit is true we do a full clone then `git checkout `, +// because shallow `--branch` does not accept commit SHAs. When false we use a +// depth-1 branch/tag clone. +// +// SubPath, if set, rewrites the destination so the final layout matches the +// requested in-repo subdirectory. +func CloneGit(ref GitRef) error { + if ref.IsCommit { + if err := runGit("clone", "--", ref.URL, ref.Dest); err != nil { + return err + } + if err := runGitIn(ref.Dest, "checkout", ref.Ref); err != nil { + return err + } + } else { + if err := runGit("clone", "--depth", "1", "--branch", ref.Ref, "--", ref.URL, ref.Dest); err != nil { + return err + } + } + + if ref.SubPath != "" { + if err := applySubPath(ref.Dest, ref.SubPath); err != nil { + return fmt.Errorf("apply subPath %q: %w", ref.SubPath, err) + } + } + return nil +} + +func runGit(args ...string) error { + return runGitIn("", args...) +} + +func runGitIn(dir string, args ...string) error { + cmd := exec.Command("git", args...) + if dir != "" { + cmd.Dir = dir + } + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Env = os.Environ() + return cmd.Run() +} + +// applySubPath replaces dest with the contents of dest/subPath. The result is +// that dest contains only the requested subdirectory. We materialize the +// content into a sibling tmp dir under the same parent so the final rename is +// atomic on the same filesystem. +func applySubPath(dest, subPath string) error { + // Defense in depth: refuse traversal even if upstream validation slipped. + clean := filepath.Clean(subPath) + if filepath.IsAbs(clean) || hasDotDot(clean) { + return fmt.Errorf("invalid subPath %q", subPath) + } + + src := filepath.Join(dest, clean) + info, err := os.Stat(src) + if err != nil { + return fmt.Errorf("stat subPath: %w", err) + } + if !info.IsDir() { + return fmt.Errorf("subPath %q is not a directory", subPath) + } + + parent := filepath.Dir(dest) + tmp, err := os.MkdirTemp(parent, ".skill-subpath-*") + if err != nil { + return fmt.Errorf("mktemp: %w", err) + } + // Best-effort cleanup if we fail before the rename. + cleanupTmp := tmp + defer func() { + if cleanupTmp != "" { + os.RemoveAll(cleanupTmp) + } + }() + + if err := copyTree(src, tmp); err != nil { + return err + } + if err := os.RemoveAll(dest); err != nil { + return err + } + if err := os.Rename(tmp, dest); err != nil { + return err + } + cleanupTmp = "" + return nil +} + +func hasDotDot(p string) bool { + for _, seg := range splitAll(p) { + if seg == ".." { + return true + } + } + return false +} + +func splitAll(p string) []string { + var out []string + for p != "" && p != "." && p != "/" { + dir, base := filepath.Split(p) + if base != "" { + out = append([]string{base}, out...) + } + if dir == p { + break + } + p = filepath.Clean(dir) + if p == "." { + break + } + } + return out +} diff --git a/go/core/internal/skillsinit/oci.go b/go/core/internal/skillsinit/oci.go new file mode 100644 index 0000000000..fa197931ba --- /dev/null +++ b/go/core/internal/skillsinit/oci.go @@ -0,0 +1,147 @@ +package skillsinit + +import ( + "archive/tar" + "fmt" + "io" + "os" + "path/filepath" + "runtime" + "strings" + + "github.com/google/go-containerregistry/pkg/crane" + v1 "github.com/google/go-containerregistry/pkg/v1" +) + +// FetchOCI pulls the named image, exports its flattened filesystem, and +// extracts it into ref.Dest. It is the in-process replacement for the old +// `krane export | tar xf -` pipeline. +// +// Auth comes from the standard DOCKER_CONFIG mechanism (set by the caller +// after MergeDockerConfigs). Platform follows the host arch — same as the +// old script's case statement on `uname -m`. +func FetchOCI(ref OCIRef, insecure bool) error { + platform, err := hostPlatform() + if err != nil { + return err + } + + opts := []crane.Option{crane.WithPlatform(platform)} + if insecure { + opts = append(opts, crane.Insecure) + } + + img, err := crane.Pull(ref.Image, opts...) + if err != nil { + return fmt.Errorf("pull %s: %w", ref.Image, err) + } + + if err := os.MkdirAll(ref.Dest, 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", ref.Dest, err) + } + + pr, pw := io.Pipe() + errCh := make(chan error, 1) + go func() { + errCh <- crane.Export(img, pw) + pw.Close() + }() + + if err := extractTar(pr, ref.Dest); err != nil { + // Drain the export goroutine to avoid a goroutine leak. + _, _ = io.Copy(io.Discard, pr) + <-errCh + return fmt.Errorf("extract %s: %w", ref.Image, err) + } + if err := <-errCh; err != nil { + return fmt.Errorf("export %s: %w", ref.Image, err) + } + return nil +} + +func hostPlatform() (*v1.Platform, error) { + var arch string + switch runtime.GOARCH { + case "amd64": + arch = "amd64" + case "arm64": + arch = "arm64" + default: + return nil, fmt.Errorf("unsupported architecture for OCI export: %s", runtime.GOARCH) + } + return &v1.Platform{OS: "linux", Architecture: arch}, nil +} + +// extractTar writes the tar stream into dst. We refuse entries that escape +// dst via absolute paths or ".." segments — the old `tar xf` accepted those, +// and untrusted skill images are exactly the case where that's dangerous. +func extractTar(r io.Reader, dst string) error { + dstAbs, err := filepath.Abs(dst) + if err != nil { + return err + } + tr := tar.NewReader(r) + for { + hdr, err := tr.Next() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + target, err := safeJoin(dstAbs, hdr.Name) + if err != nil { + return err + } + switch hdr.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, os.FileMode(hdr.Mode)|0o700); err != nil { + return err + } + case tar.TypeReg, tar.TypeRegA: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + // OCI layers can overwrite read-only files from earlier layers. + // Removing first avoids EACCES when O_TRUNC would otherwise fail. + _ = os.Remove(target) + f, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(hdr.Mode)&0o777) + if err != nil { + return err + } + if _, err := io.Copy(f, tr); err != nil { + f.Close() + return err + } + f.Close() + case tar.TypeSymlink: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + // Reject symlinks whose target escapes dst. + linkTarget := hdr.Linkname + if filepath.IsAbs(linkTarget) { + return fmt.Errorf("tar entry %q has absolute symlink target %q", hdr.Name, linkTarget) + } + resolved := filepath.Clean(filepath.Join(filepath.Dir(target), linkTarget)) + if !strings.HasPrefix(resolved+string(os.PathSeparator), dstAbs+string(os.PathSeparator)) && resolved != dstAbs { + return fmt.Errorf("tar entry %q symlink target %q escapes destination", hdr.Name, linkTarget) + } + _ = os.Remove(target) + if err := os.Symlink(linkTarget, target); err != nil { + return err + } + default: + // Skip hardlinks, devices, etc. Not meaningful in a skill bundle. + } + } +} + +func safeJoin(dst, name string) (string, error) { + cleaned := filepath.Clean("/" + name) + target := filepath.Join(dst, cleaned) + if !strings.HasPrefix(target+string(os.PathSeparator), dst+string(os.PathSeparator)) && target != dst { + return "", fmt.Errorf("tar entry %q escapes destination", name) + } + return target, nil +} diff --git a/go/core/internal/skillsinit/runner.go b/go/core/internal/skillsinit/runner.go new file mode 100644 index 0000000000..61d46bd537 --- /dev/null +++ b/go/core/internal/skillsinit/runner.go @@ -0,0 +1,64 @@ +package skillsinit + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" +) + +// Run executes the full skills-init sequence: docker config merge → git auth +// setup → git clones → OCI pulls. It returns the first error encountered; +// successful operations before the failure are left in place on disk (the +// container restarts and re-runs from scratch). +// +// homeDir is the binary's $HOME — exposed for tests. In production callers +// should pass os.UserHomeDir() or "/root". +func Run(ctx context.Context, cfg Config, homeDir string) error { + if len(cfg.ImagePullSecrets) > 0 { + dockerCfgPath := filepath.Join(os.TempDir(), "kagent-docker-config", "config.json") + dockerCfgDir, err := MergeDockerConfigs(DockerSecretsDir, cfg.ImagePullSecrets, dockerCfgPath) + if err != nil { + return fmt.Errorf("merge docker configs: %w", err) + } + if err := os.Setenv("DOCKER_CONFIG", dockerCfgDir); err != nil { + return err + } + } + + if err := SetupGitAuth(homeDir, cfg.AuthMountPath, cfg.SSHHosts); err != nil { + return fmt.Errorf("setup git auth: %w", err) + } + + for _, ref := range cfg.GitRefs { + log.Printf("cloning %s (ref=%s) into %s", ref.URL, ref.Ref, ref.Dest) + if err := CloneGit(ref); err != nil { + return fmt.Errorf("clone %s: %w", ref.URL, err) + } + } + + for _, ref := range cfg.OCIRefs { + log.Printf("exporting OCI image %s into %s", ref.Image, ref.Dest) + if err := FetchOCI(ref, cfg.InsecureOCI); err != nil { + return fmt.Errorf("oci %s: %w", ref.Image, err) + } + } + + return nil +} + +// LoadConfig reads and parses the config JSON from the conventional mount. +func LoadConfig() (Config, error) { + path := filepath.Join(ConfigMountPath, ConfigFileName) + raw, err := os.ReadFile(path) + if err != nil { + return Config{}, fmt.Errorf("read %s: %w", path, err) + } + var cfg Config + if err := json.Unmarshal(raw, &cfg); err != nil { + return Config{}, fmt.Errorf("parse %s: %w", path, err) + } + return cfg, nil +} diff --git a/go/core/internal/skillsinit/ssh.go b/go/core/internal/skillsinit/ssh.go new file mode 100644 index 0000000000..ae5c44b2e6 --- /dev/null +++ b/go/core/internal/skillsinit/ssh.go @@ -0,0 +1,113 @@ +package skillsinit + +import ( + "fmt" + "io" + "os" + "os/exec" + "path/filepath" +) + +// SetupGitAuth prepares ~/.ssh and credential helpers from the mounted auth +// secret. If a ssh-privatekey is present, it is copied into place with strict +// permissions and known_hosts is seeded via ssh-keyscan. If a token is +// present, a git credential helper is configured. +// +// homeDir is normally the binary process's $HOME. We accept it explicitly so +// tests can pass a tmpdir. +func SetupGitAuth(homeDir, authMountPath string, hosts []SSHHost) error { + if authMountPath == "" { + return nil + } + + keyPath := filepath.Join(authMountPath, "ssh-privatekey") + tokenPath := filepath.Join(authMountPath, "token") + + switch { + case fileExists(keyPath): + return setupSSHKey(homeDir, keyPath, hosts) + case fileExists(tokenPath): + return setupTokenHelper(tokenPath) + } + return nil +} + +func setupSSHKey(homeDir, keyPath string, hosts []SSHHost) error { + sshDir := filepath.Join(homeDir, ".ssh") + if err := os.MkdirAll(sshDir, 0o700); err != nil { + return fmt.Errorf("mkdir ~/.ssh: %w", err) + } + if err := copyFile(keyPath, filepath.Join(sshDir, "id_rsa"), 0o600); err != nil { + return fmt.Errorf("install ssh key: %w", err) + } + knownHosts := filepath.Join(sshDir, "known_hosts") + if err := touchFile(knownHosts, 0o600); err != nil { + return fmt.Errorf("touch known_hosts: %w", err) + } + for _, h := range hosts { + if err := keyscan(h, knownHosts); err != nil { + return fmt.Errorf("ssh-keyscan %s: %w", h.Host, err) + } + } + return nil +} + +func setupTokenHelper(tokenPath string) error { + // Use an absolute path inside the helper string. The helper is invoked + // by git via /bin/sh, so we keep the body small and quote the literal + // path — the path itself comes from us, not from user input. + helper := fmt.Sprintf("!f() { echo username=x-access-token; echo password=$(cat %q); }; f", tokenPath) + cmd := exec.Command("git", "config", "--global", "credential.helper", helper) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// keyscan invokes ssh-keyscan with an argv vector. Host and Port reach the +// process as separate arguments — they never pass through a shell. +func keyscan(h SSHHost, knownHosts string) error { + args := []string{"-H"} + if h.Port != "" { + args = append(args, "-p", h.Port) + } + args = append(args, h.Host) + + f, err := os.OpenFile(knownHosts, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) + if err != nil { + return err + } + defer f.Close() + + cmd := exec.Command("ssh-keyscan", args...) + cmd.Stdout = f + cmd.Stderr = os.Stderr + return cmd.Run() +} + +func fileExists(p string) bool { + _, err := os.Stat(p) + return err == nil +} + +func copyFile(src, dst string, mode os.FileMode) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + out, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode) + if err != nil { + return err + } + defer out.Close() + _, err = io.Copy(out, in) + return err +} + +func touchFile(p string, mode os.FileMode) error { + f, err := os.OpenFile(p, os.O_APPEND|os.O_CREATE, mode) + if err != nil { + return err + } + return f.Close() +} diff --git a/go/go.mod b/go/go.mod index 1b498ee8d8..94f2dd970e 100644 --- a/go/go.mod +++ b/go/go.mod @@ -64,6 +64,7 @@ require ( github.com/aws/aws-sdk-go-v2 v1.41.7 github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.6 github.com/golang/protobuf v1.5.4 + github.com/google/go-containerregistry v0.21.2 github.com/google/jsonschema-go v0.4.3 github.com/jackc/pgx/v5 v5.9.2 github.com/ollama/ollama v0.24.0 @@ -167,6 +168,7 @@ require ( github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/platforms v0.2.1 // indirect + github.com/containerd/stargz-snapshotter/estargz v0.18.2 // indirect github.com/cpuguy83/dockercfg v0.3.2 // indirect github.com/curioswitch/go-reassign v0.3.0 // indirect github.com/daixiang0/gci v0.13.7 // indirect @@ -176,6 +178,9 @@ require ( github.com/denis-tingaikin/go-header v0.5.0 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/dlclark/regexp2 v1.12.0 // indirect + github.com/docker/cli v29.2.1+incompatible // indirect + github.com/docker/distribution v2.8.3+incompatible // indirect + github.com/docker/docker-credential-helpers v0.9.3 // indirect github.com/docker/go-connections v0.6.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect @@ -374,6 +379,7 @@ require ( github.com/ultraware/whitespace v0.2.0 // indirect github.com/uudashr/gocognit v1.2.1 // indirect github.com/uudashr/iface v1.4.2 // indirect + github.com/vbatts/tar-split v0.12.2 // indirect github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xen0n/gosmopolitan v1.3.0 // indirect diff --git a/go/go.sum b/go/go.sum index 7392185179..03a15884dc 100644 --- a/go/go.sum +++ b/go/go.sum @@ -214,6 +214,8 @@ github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= +github.com/containerd/stargz-snapshotter/estargz v0.18.2 h1:yXkZFYIzz3eoLwlTUZKz2iQ4MrckBxJjkmD16ynUTrw= +github.com/containerd/stargz-snapshotter/estargz v0.18.2/go.mod h1:XyVU5tcJ3PRpkA9XS2T5us6Eg35yM0214Y+wvrZTBrY= github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA= github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= @@ -243,8 +245,14 @@ github.com/dlclark/regexp2 v1.12.0 h1:0j4c5qQmnC6XOWNjP3PIXURXN2gWx76rd3KvgdPkCz github.com/dlclark/regexp2 v1.12.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI= -github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/cli v29.2.1+incompatible h1:n3Jt0QVCN65eiVBoUTZQM9mcQICCJt3akW4pKAbKdJg= +github.com/docker/cli v29.2.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= +github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= +github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= +github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= @@ -369,8 +377,6 @@ github.com/godoc-lint/godoc-lint v0.11.2 h1:Bp0FkJWoSdNsBikdNgIcgtaoo+xz6I/Y9s5W github.com/godoc-lint/godoc-lint v0.11.2/go.mod h1:iVpGdL1JCikNH2gGeAn3Hh+AgN5Gx/I/cxV+91L41jo= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA= @@ -411,6 +417,8 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/go-containerregistry v0.21.2 h1:vYaMU4nU55JJGFC9JR/s8NZcTjbE9DBBbvusTW9NeS0= +github.com/google/go-containerregistry v0.21.2/go.mod h1:ctO5aCaewH4AK1AumSF5DPW+0+R+d2FmylMJdp5G7p0= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -818,6 +826,8 @@ github.com/uudashr/gocognit v1.2.1 h1:CSJynt5txTnORn/DkhiB4mZjwPuifyASC8/6Q0I/QS github.com/uudashr/gocognit v1.2.1/go.mod h1:acaubQc6xYlXFEMb9nWX2dYBzJ/bIjEkc1zzvyIZg5Q= github.com/uudashr/iface v1.4.2 h1:06Vq5RKVYThBsj0Bnw4oasMjD1r+7CE/bcKOA8dVSvg= github.com/uudashr/iface v1.4.2/go.mod h1:pbeBPlbuU2qkNDn0mmfrxP2X+wjPMIQAy+r1MBXSXtg= +github.com/vbatts/tar-split v0.12.2 h1:w/Y6tjxpeiFMR47yzZPlPj/FcPLpXbTUi/9H7d3CPa4= +github.com/vbatts/tar-split v0.12.2/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94= github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ= github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= From e418fe5eedbfbf33b36f5b428365b4e9eb7f2302 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 26 May 2026 15:26:58 -0400 Subject: [PATCH 02/13] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Signed-off-by: Eitan Yarmush --- go/core/internal/skillsinit/oci.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/go/core/internal/skillsinit/oci.go b/go/core/internal/skillsinit/oci.go index fa197931ba..ddee0e59bb 100644 --- a/go/core/internal/skillsinit/oci.go +++ b/go/core/internal/skillsinit/oci.go @@ -43,13 +43,14 @@ func FetchOCI(ref OCIRef, insecure bool) error { pr, pw := io.Pipe() errCh := make(chan error, 1) go func() { - errCh <- crane.Export(img, pw) - pw.Close() + exportErr := crane.Export(img, pw) + _ = pw.CloseWithError(exportErr) + errCh <- exportErr }() if err := extractTar(pr, ref.Dest); err != nil { - // Drain the export goroutine to avoid a goroutine leak. - _, _ = io.Copy(io.Discard, pr) + // Abort the export promptly; don't drain potentially large images. + _ = pr.CloseWithError(err) <-errCh return fmt.Errorf("extract %s: %w", ref.Image, err) } From 9a789e9f492b5d239ac0b1f16af12ec93020081d Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 26 May 2026 15:27:21 -0400 Subject: [PATCH 03/13] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Signed-off-by: Eitan Yarmush --- go/core/internal/skillsinit/oci.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/go/core/internal/skillsinit/oci.go b/go/core/internal/skillsinit/oci.go index ddee0e59bb..8982587969 100644 --- a/go/core/internal/skillsinit/oci.go +++ b/go/core/internal/skillsinit/oci.go @@ -139,7 +139,13 @@ func extractTar(r io.Reader, dst string) error { } func safeJoin(dst, name string) (string, error) { - cleaned := filepath.Clean("/" + name) + // Ensure the tar entry path is treated as relative so filepath.Join doesn't + // discard dst. We still validate after joining to prevent escapes via "..". + cleaned := filepath.Clean(name) + cleaned = strings.TrimPrefix(cleaned, string(os.PathSeparator)) + if cleaned == "." { + return dst, nil + } target := filepath.Join(dst, cleaned) if !strings.HasPrefix(target+string(os.PathSeparator), dst+string(os.PathSeparator)) && target != dst { return "", fmt.Errorf("tar entry %q escapes destination", name) From 4109232bc954b4f62b7a3a710a203465ede038d3 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 26 May 2026 19:31:00 +0000 Subject: [PATCH 04/13] refactor(skills-init): use cp -rL for subPath instead of Go walker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Go-native copyTree/walkFollow added in the previous commit went further than necessary. The src + dst paths are both controlled by us (mktemp dir + a cleaned/validated subPath under a freshly-cloned repo), and they are passed to cp as argv entries — no shell, no metacharacter risk. cp -rL is the same primitive the old script used, just invoked the safe way. What stays native: - tar extraction in oci.go (path-escape and symlink-escape rejection is real security logic that "tar xf" does not provide) - docker config merging in docker.go (pure JSON manipulation, simpler than shelling to jq) - git, ssh-keyscan invocations: argv-vector exec.Command Signed-off-by: Eitan Yarmush --- go/core/internal/skillsinit/copytree.go | 82 ------------------------- go/core/internal/skillsinit/git.go | 11 +++- 2 files changed, 9 insertions(+), 84 deletions(-) delete mode 100644 go/core/internal/skillsinit/copytree.go diff --git a/go/core/internal/skillsinit/copytree.go b/go/core/internal/skillsinit/copytree.go deleted file mode 100644 index 52f82766a6..0000000000 --- a/go/core/internal/skillsinit/copytree.go +++ /dev/null @@ -1,82 +0,0 @@ -package skillsinit - -import ( - "fmt" - "io" - "io/fs" - "os" - "path/filepath" -) - -// copyTree mirrors `cp -rL src/. dst/`: it dereferences symlinks (a symlink to -// a file becomes a regular file in dst; a symlink to a directory is walked). -// We follow links because the old script used `cp -rL`, matching git's -// in-repo symlinks pointing back into the same checkout. -// -// dst must already exist. -func copyTree(src, dst string) error { - rootInfo, err := os.Stat(src) - if err != nil { - return err - } - if !rootInfo.IsDir() { - return fmt.Errorf("copyTree source %q is not a directory", src) - } - - return walkFollow(src, "", func(rel string, info fs.FileInfo, openFile func() (io.ReadCloser, error)) error { - target := filepath.Join(dst, rel) - switch { - case info.IsDir(): - return os.MkdirAll(target, info.Mode().Perm()|0o700) - case info.Mode().IsRegular(): - if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil { - return err - } - in, err := openFile() - if err != nil { - return err - } - defer in.Close() - out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, info.Mode().Perm()) - if err != nil { - return err - } - defer out.Close() - _, err = io.Copy(out, in) - return err - default: - // Sockets, devices, fifos in a skill checkout don't make sense; - // skip them rather than failing. - return nil - } - }) -} - -// walkFollow walks src dereferencing symlinks. rel is the path relative to -// src ("" for the root). The walker invokes fn with a lazily-opened reader -// for regular files. -func walkFollow(src, rel string, fn func(rel string, info fs.FileInfo, openFile func() (io.ReadCloser, error)) error) error { - full := filepath.Join(src, rel) - info, err := os.Stat(full) // Stat follows symlinks - if err != nil { - return err - } - if rel != "" { - if err := fn(rel, info, func() (io.ReadCloser, error) { return os.Open(full) }); err != nil { - return err - } - } - if !info.IsDir() { - return nil - } - entries, err := os.ReadDir(full) - if err != nil { - return err - } - for _, e := range entries { - if err := walkFollow(src, filepath.Join(rel, e.Name()), fn); err != nil { - return err - } - } - return nil -} diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go index 3d0281afce..2bcb973aa3 100644 --- a/go/core/internal/skillsinit/git.go +++ b/go/core/internal/skillsinit/git.go @@ -88,8 +88,15 @@ func applySubPath(dest, subPath string) error { } }() - if err := copyTree(src, tmp); err != nil { - return err + // cp -rL: follow symlinks (matches the original behavior); both paths + // are constructed by us, never user-supplied, and are passed as argv — + // no shell, no metacharacter risk. Trailing "/." copies *contents* of + // src into tmp, not src itself. + cp := exec.Command("cp", "-rL", "--", src+"/.", tmp) + cp.Stdout = os.Stdout + cp.Stderr = os.Stderr + if err := cp.Run(); err != nil { + return fmt.Errorf("cp subPath contents: %w", err) } if err := os.RemoveAll(dest); err != nil { return err From 564a97fec34e61c30bc8ff2ab14a199d917623bd Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 26 May 2026 20:01:00 +0000 Subject: [PATCH 05/13] fix(skills-init): harden remaining argv flows and drop unused ctx Audit of where user-controlled CRD fields end up in subprocess argv turned up two defense-in-depth gaps: * `git checkout ` had no `--` separator. If a malicious Agent set the Ref to a string starting with `-`, git would parse it as a flag. isCommitSHA already restricts the commit branch to 40 hex chars, but the separator costs nothing. * The skill directory name (Skills.GitRefs[].Name, or derived from URL / Path / OCI ref) was used verbatim as the leaf of /skills/. `Name: "../etc"` would have escaped the volume. Now restricted to `[A-Za-z0-9._-]+` and ".", ".." rejected explicitly. Also remove the unused context.Context parameter that the new Run entrypoint took but never consulted. Audit notes (no changes needed): * OCI image string is parsed by go-containerregistry, not a shell * ssh-keyscan host/port is already regex-filtered to [A-Za-z0-9.-] * dockerconfigjson secret names are K8s DNS-labels by API contract * subPath is already validated (no abs, no ..) plus defense-in-depth in applySubPath at runtime Signed-off-by: Eitan Yarmush --- go/core/cmd/skills-init/main.go | 3 +-- .../translator/agent/adk_api_translator.go | 26 +++++++++++++++++++ go/core/internal/skillsinit/git.go | 5 +++- go/core/internal/skillsinit/runner.go | 3 +-- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/go/core/cmd/skills-init/main.go b/go/core/cmd/skills-init/main.go index 9e1cb0ec78..0a9738859d 100644 --- a/go/core/cmd/skills-init/main.go +++ b/go/core/cmd/skills-init/main.go @@ -9,7 +9,6 @@ package main import ( - "context" "log" "os" @@ -29,7 +28,7 @@ func main() { home = "/root" } - if err := skillsinit.Run(context.Background(), cfg, home); err != nil { + if err := skillsinit.Run(cfg, home); err != nil { log.Fatalf("skills-init: %v", err) } } diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index 350713a4ed..2de290cda2 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -1165,6 +1165,26 @@ func gitSSHHost(rawURL string) (skillsinit.SSHHost, bool) { return skillsinit.SSHHost{Host: host}, true } +// validSkillNamePattern restricts skill directory names to a safe alphabet. +// The name becomes the final path segment under /skills/, so anything beyond +// [a-zA-Z0-9._-] (notably "/" and "..") could escape the skills volume. +var validSkillNamePattern = regexp.MustCompile(`^[A-Za-z0-9._-]+$`) + +// validateSkillName rejects names that would escape /skills/ or look like +// dotfiles that hide the skill. +func validateSkillName(name string) error { + if name == "" { + return fmt.Errorf("skill name is empty") + } + if name == "." || name == ".." { + return fmt.Errorf("skill name %q is reserved", name) + } + if !validSkillNamePattern.MatchString(name) { + return fmt.Errorf("skill name %q must match %s", name, validSkillNamePattern) + } + return nil +} + // validateSubPath rejects subPath values that are absolute or contain ".." traversal segments. func validateSubPath(p string) error { if p == "" { @@ -1233,6 +1253,9 @@ func prepareSkillsInitConfig( ref.Ref = gitRef name := gitSkillName(ref) + if err := validateSkillName(name); err != nil { + return skillsinit.Config{}, fmt.Errorf("git skill %q: %w", ref.URL, err) + } if seen[name] { return skillsinit.Config{}, fmt.Errorf("duplicate skill directory name %q", name) } @@ -1261,6 +1284,9 @@ func prepareSkillsInitConfig( for _, imageRef := range ociRefs { name := ociSkillName(imageRef) + if err := validateSkillName(name); err != nil { + return skillsinit.Config{}, fmt.Errorf("oci skill %q: %w", imageRef, err) + } if seen[name] { return skillsinit.Config{}, fmt.Errorf("duplicate skill directory name %q", name) } diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go index 2bcb973aa3..81a752e522 100644 --- a/go/core/internal/skillsinit/git.go +++ b/go/core/internal/skillsinit/git.go @@ -23,7 +23,10 @@ func CloneGit(ref GitRef) error { if err := runGit("clone", "--", ref.URL, ref.Dest); err != nil { return err } - if err := runGitIn(ref.Dest, "checkout", ref.Ref); err != nil { + // `--` separator prevents a ref starting with `-` from being parsed + // as a flag. Refs are already validated upstream as 40-char hex when + // IsCommit is true, but defense in depth costs nothing. + if err := runGitIn(ref.Dest, "checkout", "--", ref.Ref); err != nil { return err } } else { diff --git a/go/core/internal/skillsinit/runner.go b/go/core/internal/skillsinit/runner.go index 61d46bd537..2b03744103 100644 --- a/go/core/internal/skillsinit/runner.go +++ b/go/core/internal/skillsinit/runner.go @@ -1,7 +1,6 @@ package skillsinit import ( - "context" "encoding/json" "fmt" "log" @@ -16,7 +15,7 @@ import ( // // homeDir is the binary's $HOME — exposed for tests. In production callers // should pass os.UserHomeDir() or "/root". -func Run(ctx context.Context, cfg Config, homeDir string) error { +func Run(cfg Config, homeDir string) error { if len(cfg.ImagePullSecrets) > 0 { dockerCfgPath := filepath.Join(os.TempDir(), "kagent-docker-config", "config.json") dockerCfgDir, err := MergeDockerConfigs(DockerSecretsDir, cfg.ImagePullSecrets, dockerCfgPath) From dd15782942a3235b1c105e087cac318ccae89e62 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 26 May 2026 21:18:16 +0000 Subject: [PATCH 06/13] fix(skills-init): resolve CI lint and e2e failures - docker.go: use maps.Copy instead of manual loop (modernize lint) - git.go: use slices.Contains in hasDotDot (modernize lint) - oci.go: drop deprecated tar.TypeRegA alias (SA1019) - e2e: assert skills-init via ConfigMap JSON instead of legacy script body Signed-off-by: Eitan Yarmush --- go/core/internal/skillsinit/docker.go | 5 ++--- go/core/internal/skillsinit/git.go | 8 ++------ go/core/internal/skillsinit/oci.go | 2 +- go/core/test/e2e/invoke_api_test.go | 20 +++++++++++++++----- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/go/core/internal/skillsinit/docker.go b/go/core/internal/skillsinit/docker.go index 38e4c2e70d..7e657ec9a1 100644 --- a/go/core/internal/skillsinit/docker.go +++ b/go/core/internal/skillsinit/docker.go @@ -3,6 +3,7 @@ package skillsinit import ( "encoding/json" "fmt" + "maps" "os" "path/filepath" ) @@ -33,9 +34,7 @@ func MergeDockerConfigs(secretsDir string, secretNames []string, outPath string) return "", fmt.Errorf("parse %s: %w", path, err) } dst := merged["auths"].(map[string]any) - for k, v := range parsed.Auths { - dst[k] = v - } + maps.Copy(dst, parsed.Auths) } if err := os.MkdirAll(filepath.Dir(outPath), 0o700); err != nil { diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go index 81a752e522..32a4742f0f 100644 --- a/go/core/internal/skillsinit/git.go +++ b/go/core/internal/skillsinit/git.go @@ -5,6 +5,7 @@ import ( "os" "os/exec" "path/filepath" + "slices" ) // CloneGit fetches a single git ref into ref.Dest. All user-controlled @@ -112,12 +113,7 @@ func applySubPath(dest, subPath string) error { } func hasDotDot(p string) bool { - for _, seg := range splitAll(p) { - if seg == ".." { - return true - } - } - return false + return slices.Contains(splitAll(p), "..") } func splitAll(p string) []string { diff --git a/go/core/internal/skillsinit/oci.go b/go/core/internal/skillsinit/oci.go index 8982587969..c1463adf50 100644 --- a/go/core/internal/skillsinit/oci.go +++ b/go/core/internal/skillsinit/oci.go @@ -99,7 +99,7 @@ func extractTar(r io.Reader, dst string) error { if err := os.MkdirAll(target, os.FileMode(hdr.Mode)|0o700); err != nil { return err } - case tar.TypeReg, tar.TypeRegA: + case tar.TypeReg: if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { return err } diff --git a/go/core/test/e2e/invoke_api_test.go b/go/core/test/e2e/invoke_api_test.go index 528fce2882..c31721702f 100644 --- a/go/core/test/e2e/invoke_api_test.go +++ b/go/core/test/e2e/invoke_api_test.go @@ -1218,11 +1218,21 @@ func TestE2ESkillImagePullSecrets(t *testing.T) { } require.True(t, foundSecretMount, "skills-init should mount the pull secret volume") - require.Len(t, skillsInit.Command, 3) - script := skillsInit.Command[2] - require.Contains(t, script, "jq", "skills-init script should contain jq for credential merge") - require.Contains(t, script, ".dockerconfigjson", "skills-init script should reference .dockerconfigjson") - require.Contains(t, script, "/tmp/kagent-docker-config", "skills-init script should write merged config to /tmp") + require.Len(t, skillsInit.Command, 1) + require.Equal(t, "/usr/local/bin/skills-init", skillsInit.Command[0]) + + // The skills-init binary reads its config from a ConfigMap; verify it + // lists each imagePullSecret so the binary will merge their auths. + cm := &corev1.ConfigMap{} + require.NoError(t, cli.Get(t.Context(), client.ObjectKey{ + Name: agent.Name + "-skills-init", + Namespace: agent.Namespace, + }, cm)) + var cfg struct { + ImagePullSecrets []string `json:"imagePullSecrets"` + } + require.NoError(t, json.Unmarshal([]byte(cm.Data["config.json"]), &cfg)) + require.NotEmpty(t, cfg.ImagePullSecrets, "skills-init config should list imagePullSecrets") // Verify the agent works end-to-end with the skill a2aClient := setupA2AClient(t, agent) From 1330f2b4b89d198aca59b24282a487ecd04f7db1 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Wed, 27 May 2026 16:27:46 +0000 Subject: [PATCH 07/13] test(skills-init): add injection regression tests for CVE-1842 Three new test surfaces that pin the data-only contract introduced by the shell-to-Go rewrite: - validateSkillName: rejection battery covering shell metas, traversal, newlines, null bytes, globs, brace expansion, unicode lookalikes. - prepareSkillsInitConfig: explicit-Name injection, OCI-derived name injection, subPath traversal, and a positive test confirming that malicious URL/Ref strings flow through verbatim (argv-only, never a shell). - skillsinit package: tar safeJoin/extractTar reject path traversal, absolute symlinks, and escaping relative symlinks; applySubPath rejects traversal and non-dir targets; hasDotDot covered directly. Signed-off-by: Eitan Yarmush --- .../translator/agent/skills_unit_test.go | 144 +++++++++++++++ go/core/internal/skillsinit/git_test.go | 69 ++++++++ go/core/internal/skillsinit/oci_test.go | 166 ++++++++++++++++++ 3 files changed, 379 insertions(+) create mode 100644 go/core/internal/skillsinit/git_test.go create mode 100644 go/core/internal/skillsinit/oci_test.go diff --git a/go/core/internal/controller/translator/agent/skills_unit_test.go b/go/core/internal/controller/translator/agent/skills_unit_test.go index e603f44531..7ea5d2e491 100644 --- a/go/core/internal/controller/translator/agent/skills_unit_test.go +++ b/go/core/internal/controller/translator/agent/skills_unit_test.go @@ -368,3 +368,147 @@ func Test_prepareSkillsInitConfig_noAuthSkipsSSHHosts(t *testing.T) { require.NoError(t, err) assert.Empty(t, data.SSHHosts, "SSH hosts should not be collected when authSecretRef is nil") } + +// Test_validateSkillName_rejectsInjection is the regression battery for the +// original CVE: any character that could escape the /skills/ directory +// or be re-interpreted by a shell (back when skills-init was a heredoc) must +// be rejected before it reaches the binary. +func Test_validateSkillName_rejectsInjection(t *testing.T) { + cases := []struct { + name string + in string + }{ + {"empty", ""}, + {"dot", "."}, + {"dotdot", ".."}, + {"slash traversal", "../etc"}, + {"forward slash", "a/b"}, + {"backslash", "a\\b"}, + {"shell semicolon", "skill;rm -rf /"}, + {"command substitution", "skill$(id)"}, + {"backtick substitution", "skill`id`"}, + {"pipe", "skill|nc attacker 4444"}, + {"and", "skill&&id"}, + {"redirect", "skill>/etc/passwd"}, + {"newline", "skill\nrm -rf /"}, + {"carriage return", "skill\r\nrm"}, + {"null byte", "skill\x00trail"}, + {"glob star", "skill*"}, + {"glob question", "skill?"}, + {"space", "skill name"}, + {"tab", "skill\tname"}, + {"dollar var", "skill$HOME"}, + {"brace expansion", "skill{a,b}"}, + {"unicode dot-substitute", "skill․"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + err := validateSkillName(tc.in) + require.Error(t, err, "validateSkillName(%q) must reject", tc.in) + }) + } +} + +// Test_validateSkillName_acceptsSafe documents the positive side of the +// allowlist so a future regex tightening doesn't silently break valid names. +func Test_validateSkillName_acceptsSafe(t *testing.T) { + for _, in := range []string{"skill", "my-skill", "my_skill", "skill.v1", "Skill123", "a"} { + t.Run(in, func(t *testing.T) { + require.NoError(t, validateSkillName(in)) + }) + } +} + +// Test_prepareSkillsInitConfig_explicitNameRejectsTraversal exercises the +// validation path when the CRD provides an explicit skill Name (rather than +// it being derived from the URL/image). This is the field that historically +// landed in a shell-templated script. +func Test_prepareSkillsInitConfig_explicitNameRejectsTraversal(t *testing.T) { + cases := []struct { + name string + in string + }{ + {"traversal", "../escape"}, + {"absolute", "/etc/passwd"}, + {"semicolon", "skill;id"}, + {"command sub", "skill$(id)"}, + {"newline", "skill\nrm"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := prepareSkillsInitConfig( + []v1alpha2.GitRepo{ + {URL: "https://github.com/org/repo", Ref: "main", Name: tc.in}, + }, + nil, nil, false, nil, + ) + require.Error(t, err) + }) + } +} + +// Test_prepareSkillsInitConfig_ociNameDerivationRejectsInjection verifies +// that an OCI image reference whose final path segment contains injection +// characters is rejected even though the registry would technically parse it. +// The derived name becomes a directory under /skills, so the allowlist must +// hold here too. +func Test_prepareSkillsInitConfig_ociNameDerivationRejectsInjection(t *testing.T) { + // ociSkillName takes path.Base of the repo portion. Crafted refs where the + // last segment contains shell metas must be rejected. + cases := []string{ + "ghcr.io/org/skill;id", + "ghcr.io/org/skill$(id)", + "ghcr.io/org/skill name", + } + for _, ref := range cases { + t.Run(ref, func(t *testing.T) { + _, err := prepareSkillsInitConfig(nil, nil, []string{ref}, false, nil) + require.Error(t, err, "ref %q should be rejected", ref) + }) + } +} + +// Test_prepareSkillsInitConfig_preservesInjectionStringsAsData proves the +// data-only contract: even when URL/Ref contain shell metacharacters, the +// translator does not reject them (URL/Ref aren't allowlisted — they're passed +// to git as argv) and reproduces them byte-for-byte in the config. Any +// "interpretation" of these strings would show up as a difference here. +func Test_prepareSkillsInitConfig_preservesInjectionStringsAsData(t *testing.T) { + maliciousURL := "https://github.com/org/repo;rm -rf /$(id)`whoami`" + maliciousRef := "main;rm -rf /" + cfg, err := prepareSkillsInitConfig( + []v1alpha2.GitRepo{ + { + URL: maliciousURL, + Ref: maliciousRef, + Name: "safe-name", + }, + }, + nil, nil, false, nil, + ) + require.NoError(t, err, "URL/Ref are not allowlisted; they flow as data") + require.Len(t, cfg.GitRefs, 1) + assert.Equal(t, maliciousURL, cfg.GitRefs[0].URL, "URL must be preserved verbatim — argv flow") + assert.Equal(t, maliciousRef, cfg.GitRefs[0].Ref, "Ref must be preserved verbatim — argv flow") +} + +// Test_prepareSkillsInitConfig_subPathRejectsInjection covers the SubPath +// branch with the same battery the original heredoc would have interpolated. +func Test_prepareSkillsInitConfig_subPathRejectsInjection(t *testing.T) { + cases := []string{ + "../escape", + "a/../b", + "/etc/passwd", + } + for _, p := range cases { + t.Run(p, func(t *testing.T) { + _, err := prepareSkillsInitConfig( + []v1alpha2.GitRepo{ + {URL: "https://github.com/org/repo", Ref: "main", Path: p}, + }, + nil, nil, false, nil, + ) + require.Error(t, err) + }) + } +} diff --git a/go/core/internal/skillsinit/git_test.go b/go/core/internal/skillsinit/git_test.go new file mode 100644 index 0000000000..d3d591d492 --- /dev/null +++ b/go/core/internal/skillsinit/git_test.go @@ -0,0 +1,69 @@ +package skillsinit + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Test_hasDotDot is the unit-level check behind applySubPath's defense in +// depth. Inputs are expected to be filepath.Clean'd by the caller, so only +// genuinely-escaping ".." segments remain — that's what we exercise here. +func Test_hasDotDot(t *testing.T) { + cases := []struct { + name string + in string + want bool + }{ + {"empty", "", false}, + {"plain", "skills/foo", false}, + {"dot", ".", false}, + {"single dotdot", "..", true}, + {"leading dotdot", "../escape", true}, + {"chained dotdot", "../../escape", true}, + {"name contains dots not segment", "..foo", false}, + {"name suffix dots", "foo..", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, hasDotDot(tc.in)) + }) + } +} + +// Test_applySubPath_rejectsTraversal exercises the validation gate without +// invoking `cp`. We give it a clean dest tree with a real subdir then ask +// for traversal — the function must error before touching the filesystem. +func Test_applySubPath_rejectsTraversal(t *testing.T) { + dest := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dest, "real"), 0o755)) + + cases := []string{ + "../escape", + "/etc", + "a/../../escape", + } + for _, p := range cases { + t.Run(p, func(t *testing.T) { + err := applySubPath(dest, p) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid subPath") + }) + } +} + +// Test_applySubPath_rejectsNonDir guards against a benign-looking subPath +// that points at a file rather than a directory. Without this check the +// subsequent `cp -rL` would do something silly; the explicit error is +// clearer and matches the documented contract. +func Test_applySubPath_rejectsNonDir(t *testing.T) { + dest := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dest, "file"), []byte("x"), 0o644)) + + err := applySubPath(dest, "file") + require.Error(t, err) + assert.Contains(t, err.Error(), "not a directory") +} diff --git a/go/core/internal/skillsinit/oci_test.go b/go/core/internal/skillsinit/oci_test.go new file mode 100644 index 0000000000..a98575f96d --- /dev/null +++ b/go/core/internal/skillsinit/oci_test.go @@ -0,0 +1,166 @@ +package skillsinit + +import ( + "archive/tar" + "bytes" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Test_safeJoin_rejectsEscape covers every shape of tar-entry name that the +// original `tar xf` pipeline would have happily honored: absolute paths, +// ".." traversal, and combinations thereof. A malicious skill image is the +// motivating threat — these names must never produce paths outside dst. +func Test_safeJoin_rejectsEscape(t *testing.T) { + dst := "/tmp/skills/dest" + + cases := []struct { + name string + entry string + wantErr bool + }{ + {"plain file", "file.txt", false}, + {"nested file", "a/b/c.txt", false}, + {"dot-only", ".", false}, + {"leading slash stripped", "/file.txt", false}, // joined under dst, not at / + {"traversal", "../escape", true}, + {"traversal mid-path", "a/../../escape", true}, + {"absolute escape", "/etc/passwd", false}, // safeJoin strips leading "/" but result is dst/etc/passwd which is under dst — that's intentional + {"deep traversal", "../../../etc/passwd", true}, + {"trailing traversal", "a/b/../../..", true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := safeJoin(dst, tc.entry) + if tc.wantErr { + require.Error(t, err, "safeJoin(%q, %q) must reject", dst, tc.entry) + } else { + require.NoError(t, err) + } + }) + } +} + +// Test_extractTar_rejectsPathTraversalEntry feeds a hand-crafted tar with a +// "../escape" entry. The old shell pipeline would have written outside the +// destination; extractTar must error and not create any file. +func Test_extractTar_rejectsPathTraversalEntry(t *testing.T) { + dst := t.TempDir() + buf := tarOf(t, tarEntry{Name: "../escape.txt", Mode: 0o644, Body: []byte("pwned")}) + err := extractTar(buf, dst) + require.Error(t, err) + assert.Contains(t, err.Error(), "escapes destination") + + // Sanity: nothing was created either inside dst or as a sibling. + _, statErr := os.Stat(filepath.Join(filepath.Dir(dst), "escape.txt")) + require.True(t, os.IsNotExist(statErr), "sibling file must not exist") +} + +// Test_extractTar_rejectsAbsoluteSymlink mirrors the OCI test corpus the +// previous container shipped (e.g. distroless's /etc/localtime symlink). +// We refuse rather than risk writing outside the volume. +func Test_extractTar_rejectsAbsoluteSymlink(t *testing.T) { + dst := t.TempDir() + buf := tarOf(t, tarEntry{ + Name: "localtime", + LinkName: "/etc/passwd", + Type: tar.TypeSymlink, + }) + err := extractTar(buf, dst) + require.Error(t, err) + assert.Contains(t, err.Error(), "absolute symlink") +} + +// Test_extractTar_rejectsEscapingSymlink covers relative symlinks whose +// resolved target points outside dst. +func Test_extractTar_rejectsEscapingSymlink(t *testing.T) { + dst := t.TempDir() + buf := tarOf(t, tarEntry{ + Name: "link", + LinkName: "../../etc/passwd", + Type: tar.TypeSymlink, + }) + err := extractTar(buf, dst) + require.Error(t, err) + assert.Contains(t, err.Error(), "escapes destination") +} + +// Test_extractTar_acceptsBenignSymlink ensures we haven't broken the legitimate +// in-tree symlink case (e.g., a/b -> a/c). +func Test_extractTar_acceptsBenignSymlink(t *testing.T) { + dst := t.TempDir() + buf := tarOf(t, + tarEntry{Name: "target.txt", Mode: 0o644, Body: []byte("hi")}, + tarEntry{Name: "link.txt", LinkName: "target.txt", Type: tar.TypeSymlink}, + ) + require.NoError(t, extractTar(buf, dst)) + got, err := os.Readlink(filepath.Join(dst, "link.txt")) + require.NoError(t, err) + assert.Equal(t, "target.txt", got) +} + +// Test_extractTar_writesRegularFiles is the smoke test that confirms the +// rewritten extractor still writes normal entries — without this, the negative +// tests above could pass by being unconditionally restrictive. +func Test_extractTar_writesRegularFiles(t *testing.T) { + dst := t.TempDir() + buf := tarOf(t, + tarEntry{Name: "sub/", Mode: 0o755, Type: tar.TypeDir}, + tarEntry{Name: "sub/a.txt", Mode: 0o644, Body: []byte("hello")}, + ) + require.NoError(t, extractTar(buf, dst)) + body, err := os.ReadFile(filepath.Join(dst, "sub", "a.txt")) + require.NoError(t, err) + assert.Equal(t, "hello", string(body)) +} + +// tarEntry is a minimal description of one tar record. +type tarEntry struct { + Name string + Mode int64 + Body []byte + LinkName string + Type byte +} + +// tarOf assembles a tar stream in memory for use as input to extractTar. +func tarOf(t *testing.T, entries ...tarEntry) *bytes.Buffer { + t.Helper() + var buf bytes.Buffer + w := tar.NewWriter(&buf) + for _, e := range entries { + typ := e.Type + if typ == 0 { + if e.LinkName != "" { + typ = tar.TypeSymlink + } else if strings.HasSuffix(e.Name, "/") { + typ = tar.TypeDir + } else { + typ = tar.TypeReg + } + } + hdr := &tar.Header{ + Name: e.Name, + Mode: e.Mode, + Size: int64(len(e.Body)), + Typeflag: typ, + Linkname: e.LinkName, + } + if typ != tar.TypeReg { + hdr.Size = 0 + } + require.NoError(t, w.WriteHeader(hdr)) + if typ == tar.TypeReg && len(e.Body) > 0 { + _, err := w.Write(e.Body) + require.NoError(t, err) + } + } + require.NoError(t, w.Close()) + return &buf +} From 27d3da1435789b21255f3bdec3e865af83f8c52d Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Thu, 28 May 2026 17:20:52 +0000 Subject: [PATCH 08/13] refactor(skills-init): address PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - main.go: drop $HOME env var dependency; resolve home via os/user.Current() (reads /etc/passwd, deterministic regardless of how container is invoked) - translator: omit container Command so Dockerfile ENTRYPOINT is the single source of truth for the binary path - translator: validate the derived skills-init ConfigMap name against K8s DNS-1123 subdomain rules; fail fast with a clear error rather than letting the apiserver reject the eventual write - translator: improve skill-name error wording for "." / ".." - skillsinit: replace custom path safety code with stdlib primitives — filepath.Localize for tar entry names, filepath.IsLocal for subPath, and os.Root.MkdirAll for in-root mkdir (Go 1.25+) - skillsinit/oci.go: tar extraction now runs entirely through an os.Root anchored at dst, so any escape (path traversal or symlink) is enforced by the kernel as defense in depth on top of the explicit string checks Tests updated to match the new stdlib-driven semantics: paths like "a/../b" that cleanly collapse to a safe in-repo location are now accepted, while genuinely-escaping inputs ("../escape", "/etc/passwd", "a/../../escape") remain rejected. Related to #1842. Signed-off-by: Eitan Yarmush --- go/core/cmd/skills-init/main.go | 14 ++- .../translator/agent/adk_api_translator.go | 34 +++++-- .../translator/agent/git_skills_test.go | 9 +- .../translator/agent/manifest_builder.go | 53 +++++++---- .../translator/agent/skills_unit_test.go | 22 +++-- .../outputs/agent_with_git_skills.json | 6 +- .../testdata/outputs/agent_with_skills.json | 6 +- go/core/internal/skillsinit/git.go | 30 +----- go/core/internal/skillsinit/git_test.go | 25 ----- go/core/internal/skillsinit/oci.go | 94 ++++++++++++------- go/core/internal/skillsinit/oci_test.go | 20 ++-- 11 files changed, 164 insertions(+), 149 deletions(-) diff --git a/go/core/cmd/skills-init/main.go b/go/core/cmd/skills-init/main.go index 0a9738859d..d12ef549e5 100644 --- a/go/core/cmd/skills-init/main.go +++ b/go/core/cmd/skills-init/main.go @@ -10,7 +10,7 @@ package main import ( "log" - "os" + "os/user" "github.com/kagent-dev/kagent/go/core/internal/skillsinit" ) @@ -23,9 +23,17 @@ func main() { log.Fatalf("skills-init: %v", err) } - home, _ := os.UserHomeDir() + // Resolve home from /etc/passwd via the current uid rather than $HOME so + // the binary behaves consistently regardless of how the container was + // invoked. The image's user is created with a fixed home dir, so this is + // deterministic in production and overridable in tests via the user db. + u, err := user.Current() + if err != nil { + log.Fatalf("skills-init: lookup current user: %v", err) + } + home := u.HomeDir if home == "" { - home = "/root" + log.Fatalf("skills-init: current user %q has no home directory", u.Username) } if err := skillsinit.Run(cfg, home); err != nil { diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index 2de290cda2..a78a95739a 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -12,6 +12,7 @@ import ( "net/url" "os" "path" + "path/filepath" "regexp" "slices" "strings" @@ -32,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/validation" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -1004,11 +1006,12 @@ func applyProxyURL(originalURL, proxyURL string, headers map[string]string) (tar return targetURL, updatedHeaders, nil } -func computeConfigHash(agentCfg, agentCard, secretData []byte) uint64 { +func computeConfigHash(agentCfg, agentCard, secretData, skillsInitCfg []byte) uint64 { hasher := sha256.New() hasher.Write(agentCfg) hasher.Write(agentCard) hasher.Write(secretData) + hasher.Write(skillsInitCfg) hash := hasher.Sum(nil) return binary.BigEndian.Uint64(hash[:8]) } @@ -1177,7 +1180,7 @@ func validateSkillName(name string) error { return fmt.Errorf("skill name is empty") } if name == "." || name == ".." { - return fmt.Errorf("skill name %q is reserved", name) + return fmt.Errorf("skill name %q is not a valid directory name", name) } if !validSkillNamePattern.MatchString(name) { return fmt.Errorf("skill name %q must match %s", name, validSkillNamePattern) @@ -1190,11 +1193,10 @@ func validateSubPath(p string) error { if p == "" { return nil } - if path.IsAbs(p) { - return fmt.Errorf("skill subPath must be relative, got %q", p) - } - if slices.Contains(strings.Split(p, "/"), "..") { - return fmt.Errorf("skill subPath must not contain '..', got %q", p) + // filepath.IsLocal rejects absolute paths, ".." segments, and anything + // else that can't be a local relative path — exactly the threat model here. + if !filepath.IsLocal(p) { + return fmt.Errorf("skill subPath must be a relative path without '..' segments, got %q", p) } return nil } @@ -1318,6 +1320,18 @@ func SkillsInitConfigMapName(agentName string) string { return agentName + SkillsInitConfigMapSuffix } +// validateSkillsInitConfigMapName enforces the K8s DNS-1123 subdomain rules +// on the derived ConfigMap name. Agent names are already constrained by the +// CRD, but the suffix can push borderline names over the 253-char limit, so +// we fail fast here with a clear message rather than letting the apiserver +// reject the eventual write. +func validateSkillsInitConfigMapName(name string) error { + if errs := validation.IsDNS1123Subdomain(name); len(errs) > 0 { + return fmt.Errorf("derived skills-init ConfigMap name %q is invalid: %s", name, strings.Join(errs, "; ")) + } + return nil +} + // buildSkillsInitContainer assembles the init container, its volumes, and the // ConfigMap holding its JSON configuration. The container runs a kagent-owned // Go binary that consumes the ConfigMap; no shell is involved, so @@ -1353,6 +1367,9 @@ func buildSkillsInitContainer( } cmName := SkillsInitConfigMapName(agentName) + if err := validateSkillsInitConfigMapName(cmName); err != nil { + return nil, nil, nil, err + } configMap = &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: cmName, @@ -1415,10 +1432,11 @@ func buildSkillsInitContainer( }) } + // Command is intentionally omitted: the skills-init image's ENTRYPOINT + // is the single source of truth for the binary path. skillsInitContainer := corev1.Container{ Name: "skills-init", Image: DefaultSkillsInitImageConfig.Image(), - Command: []string{"/usr/local/bin/skills-init"}, VolumeMounts: volumeMounts, SecurityContext: initSecCtx, Env: envVars, diff --git a/go/core/internal/controller/translator/agent/git_skills_test.go b/go/core/internal/controller/translator/agent/git_skills_test.go index 6a95f5049c..d2181e888a 100644 --- a/go/core/internal/controller/translator/agent/git_skills_test.go +++ b/go/core/internal/controller/translator/agent/git_skills_test.go @@ -360,9 +360,9 @@ func Test_AdkApiTranslator_Skills(t *testing.T) { // There should be exactly one init container assert.Len(t, initContainers, 1, "should have exactly one init container") - // The new binary is invoked with a single argv entry. - require.Len(t, skillsInitContainer.Command, 1) - assert.Equal(t, "/usr/local/bin/skills-init", skillsInitContainer.Command[0]) + // Command is intentionally omitted so the image's + // ENTRYPOINT is the single source of truth for the binary path. + assert.Empty(t, skillsInitContainer.Command, "Command should be unset; ENTRYPOINT carries the binary path") cfg := findSkillsInitConfig(t, outputs.Manifest, tt.agent.Name) @@ -592,8 +592,7 @@ func Test_AdkApiTranslator_SkillsImagePullSecrets(t *testing.T) { require.Equal(t, "skills-init", initContainers[0].Name, "the single init container must be skills-init") skillsInitContainer := &initContainers[0] - require.Len(t, skillsInitContainer.Command, 1) - assert.Equal(t, "/usr/local/bin/skills-init", skillsInitContainer.Command[0]) + assert.Empty(t, skillsInitContainer.Command, "Command should be unset; ENTRYPOINT carries the binary path") cfg := findSkillsInitConfig(t, outputs.Manifest, tt.agent.Name) diff --git a/go/core/internal/controller/translator/agent/manifest_builder.go b/go/core/internal/controller/translator/agent/manifest_builder.go index bd27bf30a2..cdcca5668c 100644 --- a/go/core/internal/controller/translator/agent/manifest_builder.go +++ b/go/core/internal/controller/translator/agent/manifest_builder.go @@ -2,8 +2,6 @@ package agent import ( "context" - "crypto/sha256" - "encoding/hex" "encoding/json" "fmt" "maps" @@ -31,10 +29,20 @@ type manifestContext struct { } type configSecretInputs struct { - secret *corev1.Secret - configHash uint64 - volumes []corev1.Volume - mounts []corev1.VolumeMount + secret *corev1.Secret + volumes []corev1.Volume + mounts []corev1.VolumeMount + // hashInput is the byte payload that should be folded into the pod + // template's config-hash annotation. Hashing is done by the caller once + // all rollout-relevant inputs (including the skills-init ConfigMap) are + // known. + hashInput configHashInput +} + +type configHashInput struct { + agentCfg []byte + agentCard []byte + secretData []byte } type podRuntimeInputs struct { @@ -80,11 +88,20 @@ func (a *adkApiTranslator) BuildManifest( return nil, err } + var skillsInitCfg []byte if podRuntime.skillsInitConfigMap != nil { outputs.Manifest = append(outputs.Manifest, podRuntime.skillsInitConfigMap) + // Folded into the same rollout-trigger hash as the rest of the pod + // config — the PodSpec only names the ConfigMap, so Kubernetes + // wouldn't otherwise restart the pod when its rendered config changes. + skillsInitCfg = []byte(podRuntime.skillsInitConfigMap.Data[skillsinit.ConfigMapKey]) + } + var configHash uint64 + if h := configSecret.hashInput; h.agentCfg != nil || h.agentCard != nil || h.secretData != nil || skillsInitCfg != nil { + configHash = computeConfigHash(h.agentCfg, h.agentCard, h.secretData, skillsInitCfg) } - podTemplate := buildPodTemplate(manifestCtx, podRuntime, configSecret.configHash) + podTemplate := buildPodTemplate(manifestCtx, podRuntime, configHash) workloadObjects, err := a.buildWorkloadObjects(ctx, manifestCtx, podTemplate) if err != nil { @@ -146,7 +163,7 @@ func (a *adkApiTranslator) buildConfigSecret( cfgJSON := "" agentCard := "" srtSettingsJSON := "" - var configHash uint64 + var hashInput configHashInput var volumes []corev1.Volume var mounts []corev1.VolumeMount @@ -180,7 +197,11 @@ func (a *adkApiTranslator) buildConfigSecret( hashData := make([]byte, 0, len(secretData)+len(srtSettingsJSON)) hashData = append(hashData, secretData...) hashData = append(hashData, srtSettingsJSON...) - configHash = computeConfigHash([]byte(cfgJSON), []byte(agentCard), hashData) + hashInput = configHashInput{ + agentCfg: []byte(cfgJSON), + agentCard: []byte(agentCard), + secretData: hashData, + } volumes = []corev1.Volume{{ Name: "config", VolumeSource: corev1.VolumeSource{ @@ -196,9 +217,9 @@ func (a *adkApiTranslator) buildConfigSecret( ObjectMeta: manifestCtx.objectMeta(), StringData: buildConfigSecretData(cfgJSON, agentCard, srtSettingsJSON), }, - configHash: configHash, - volumes: volumes, - mounts: mounts, + volumes: volumes, + mounts: mounts, + hashInput: hashInput, }, nil } @@ -458,14 +479,6 @@ func buildPodTemplate( podTemplateAnnotations = map[string]string{} } podTemplateAnnotations["kagent.dev/config-hash"] = fmt.Sprintf("%d", configHash) - if cm := runtimeInputs.skillsInitConfigMap; cm != nil { - // Hash the rendered config so a content change in the skills-init - // ConfigMap triggers a pod rollout — the PodSpec only names the - // ConfigMap, so without this annotation Kubernetes would leave the - // pod running against stale config. - sum := sha256.Sum256([]byte(cm.Data[skillsinit.ConfigMapKey])) - podTemplateAnnotations["kagent.dev/skills-init-hash"] = hex.EncodeToString(sum[:8]) - } probeConf := getRuntimeProbeConfig(agentRuntime(manifestCtx.agent.GetAgentSpec())) diff --git a/go/core/internal/controller/translator/agent/skills_unit_test.go b/go/core/internal/controller/translator/agent/skills_unit_test.go index 7ea5d2e491..958d640c0e 100644 --- a/go/core/internal/controller/translator/agent/skills_unit_test.go +++ b/go/core/internal/controller/translator/agent/skills_unit_test.go @@ -203,11 +203,14 @@ func Test_validateSubPath(t *testing.T) { {name: "empty is valid", path: "", wantErr: ""}, {name: "simple relative path", path: "skills/k8s", wantErr: ""}, {name: "single segment", path: "subdir", wantErr: ""}, - {name: "absolute path rejected", path: "/etc/passwd", wantErr: "must be relative"}, - {name: "dotdot at start rejected", path: "../escape", wantErr: "must not contain '..'"}, - {name: "dotdot in middle rejected", path: "a/../b", wantErr: "must not contain '..'"}, - {name: "dotdot at end rejected", path: "a/b/..", wantErr: "must not contain '..'"}, - {name: "bare dotdot rejected", path: "..", wantErr: "must not contain '..'"}, + {name: "absolute path rejected", path: "/etc/passwd", wantErr: "relative path"}, + {name: "dotdot at start rejected", path: "../escape", wantErr: "relative path"}, + {name: "deep traversal rejected", path: "a/../../escape", wantErr: "relative path"}, + {name: "bare dotdot rejected", path: "..", wantErr: "relative path"}, + // filepath.IsLocal collapses ".." segments before evaluating: "a/../b" cleans to + // "b" and "a/b/.." cleans to "a" — both are safe in-repo subdirs and accepted. + {name: "dotdot in middle that cleans to local is ok", path: "a/../b", wantErr: ""}, + {name: "dotdot at end that cleans to local is ok", path: "a/b/..", wantErr: ""}, {name: "dots in name are ok", path: "my.skill/v1.0", wantErr: ""}, } @@ -296,7 +299,7 @@ func Test_prepareSkillsInitConfig_pathTraversal(t *testing.T) { nil, nil, false, nil, ) require.Error(t, err) - assert.Contains(t, err.Error(), "must not contain '..'") + assert.Contains(t, err.Error(), "relative path") } func Test_prepareSkillsInitConfig_absolutePath(t *testing.T) { @@ -307,7 +310,7 @@ func Test_prepareSkillsInitConfig_absolutePath(t *testing.T) { nil, nil, false, nil, ) require.Error(t, err) - assert.Contains(t, err.Error(), "must be relative") + assert.Contains(t, err.Error(), "relative path") } func Test_prepareSkillsInitConfig_authMountPath(t *testing.T) { @@ -495,9 +498,12 @@ func Test_prepareSkillsInitConfig_preservesInjectionStringsAsData(t *testing.T) // Test_prepareSkillsInitConfig_subPathRejectsInjection covers the SubPath // branch with the same battery the original heredoc would have interpolated. func Test_prepareSkillsInitConfig_subPathRejectsInjection(t *testing.T) { + // "a/../b" is intentionally not in this list: filepath.Clean collapses it + // to "b", which is a safe in-repo subdirectory, so we accept it. The + // dangerous cases are escaping ".." and absolute paths. cases := []string{ "../escape", - "a/../b", + "a/../../escape", "/etc/passwd", } for _, p := range cases { diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json index 48473e031a..ee8420af67 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json @@ -151,8 +151,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "9443054578640766875", - "kagent.dev/skills-init-hash": "9fa4c702efa19e21" + "kagent.dev/config-hash": "16162636769325195580" }, "labels": { "app": "kagent", @@ -258,9 +257,6 @@ ], "initContainers": [ { - "command": [ - "/usr/local/bin/skills-init" - ], "image": "cr.kagent.dev/kagent-dev/kagent/skills-init:dev", "name": "skills-init", "resources": { diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json index a692fa0852..93458886a6 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_skills.json @@ -151,8 +151,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "4496754913718271849", - "kagent.dev/skills-init-hash": "a03b999a4613cdde" + "kagent.dev/config-hash": "16877497333853346886" }, "labels": { "app": "kagent", @@ -258,9 +257,6 @@ ], "initContainers": [ { - "command": [ - "/usr/local/bin/skills-init" - ], "image": "cr.kagent.dev/kagent-dev/kagent/skills-init:dev", "name": "skills-init", "resources": { diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go index 32a4742f0f..9907a61098 100644 --- a/go/core/internal/skillsinit/git.go +++ b/go/core/internal/skillsinit/git.go @@ -5,7 +5,6 @@ import ( "os" "os/exec" "path/filepath" - "slices" ) // CloneGit fetches a single git ref into ref.Dest. All user-controlled @@ -64,12 +63,12 @@ func runGitIn(dir string, args ...string) error { // content into a sibling tmp dir under the same parent so the final rename is // atomic on the same filesystem. func applySubPath(dest, subPath string) error { - // Defense in depth: refuse traversal even if upstream validation slipped. - clean := filepath.Clean(subPath) - if filepath.IsAbs(clean) || hasDotDot(clean) { + // Defense in depth: filepath.IsLocal rejects absolute paths, ".." + // segments, and reserved names — exactly the set we want to refuse. + if !filepath.IsLocal(subPath) { return fmt.Errorf("invalid subPath %q", subPath) } - + clean := filepath.Clean(subPath) src := filepath.Join(dest, clean) info, err := os.Stat(src) if err != nil { @@ -112,24 +111,3 @@ func applySubPath(dest, subPath string) error { return nil } -func hasDotDot(p string) bool { - return slices.Contains(splitAll(p), "..") -} - -func splitAll(p string) []string { - var out []string - for p != "" && p != "." && p != "/" { - dir, base := filepath.Split(p) - if base != "" { - out = append([]string{base}, out...) - } - if dir == p { - break - } - p = filepath.Clean(dir) - if p == "." { - break - } - } - return out -} diff --git a/go/core/internal/skillsinit/git_test.go b/go/core/internal/skillsinit/git_test.go index d3d591d492..6b8417d1d8 100644 --- a/go/core/internal/skillsinit/git_test.go +++ b/go/core/internal/skillsinit/git_test.go @@ -9,31 +9,6 @@ import ( "github.com/stretchr/testify/require" ) -// Test_hasDotDot is the unit-level check behind applySubPath's defense in -// depth. Inputs are expected to be filepath.Clean'd by the caller, so only -// genuinely-escaping ".." segments remain — that's what we exercise here. -func Test_hasDotDot(t *testing.T) { - cases := []struct { - name string - in string - want bool - }{ - {"empty", "", false}, - {"plain", "skills/foo", false}, - {"dot", ".", false}, - {"single dotdot", "..", true}, - {"leading dotdot", "../escape", true}, - {"chained dotdot", "../../escape", true}, - {"name contains dots not segment", "..foo", false}, - {"name suffix dots", "foo..", false}, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.want, hasDotDot(tc.in)) - }) - } -} - // Test_applySubPath_rejectsTraversal exercises the validation gate without // invoking `cp`. We give it a clean dest tree with a real subdir then ask // for traversal — the function must error before touching the filesystem. diff --git a/go/core/internal/skillsinit/oci.go b/go/core/internal/skillsinit/oci.go index c1463adf50..d3df0d09ba 100644 --- a/go/core/internal/skillsinit/oci.go +++ b/go/core/internal/skillsinit/oci.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "os" + "path" "path/filepath" "runtime" "strings" @@ -73,14 +74,16 @@ func hostPlatform() (*v1.Platform, error) { return &v1.Platform{OS: "linux", Architecture: arch}, nil } -// extractTar writes the tar stream into dst. We refuse entries that escape -// dst via absolute paths or ".." segments — the old `tar xf` accepted those, -// and untrusted skill images are exactly the case where that's dangerous. +// extractTar writes the tar stream into dst. All filesystem operations go +// through an os.Root anchored at dst, so any path or symlink that would +// resolve outside dst is rejected by the kernel. func extractTar(r io.Reader, dst string) error { - dstAbs, err := filepath.Abs(dst) + root, err := os.OpenRoot(dst) if err != nil { - return err + return fmt.Errorf("open root %s: %w", dst, err) } + defer root.Close() + tr := tar.NewReader(r) for { hdr, err := tr.Next() @@ -90,23 +93,26 @@ func extractTar(r io.Reader, dst string) error { if err != nil { return err } - target, err := safeJoin(dstAbs, hdr.Name) + rel, err := tarEntryToLocal(hdr.Name) if err != nil { - return err + return fmt.Errorf("tar entry %q: %w", hdr.Name, err) + } + if rel == "" { + continue } switch hdr.Typeflag { case tar.TypeDir: - if err := os.MkdirAll(target, os.FileMode(hdr.Mode)|0o700); err != nil { + if err := root.MkdirAll(rel, os.FileMode(hdr.Mode)|0o700); err != nil { return err } case tar.TypeReg: - if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + if err := root.MkdirAll(filepath.Dir(rel), 0o755); err != nil { return err } // OCI layers can overwrite read-only files from earlier layers. // Removing first avoids EACCES when O_TRUNC would otherwise fail. - _ = os.Remove(target) - f, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(hdr.Mode)&0o777) + _ = root.Remove(rel) + f, err := root.OpenFile(rel, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(hdr.Mode)&0o777) if err != nil { return err } @@ -116,20 +122,14 @@ func extractTar(r io.Reader, dst string) error { } f.Close() case tar.TypeSymlink: - if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + if err := root.MkdirAll(filepath.Dir(rel), 0o755); err != nil { return err } - // Reject symlinks whose target escapes dst. - linkTarget := hdr.Linkname - if filepath.IsAbs(linkTarget) { - return fmt.Errorf("tar entry %q has absolute symlink target %q", hdr.Name, linkTarget) - } - resolved := filepath.Clean(filepath.Join(filepath.Dir(target), linkTarget)) - if !strings.HasPrefix(resolved+string(os.PathSeparator), dstAbs+string(os.PathSeparator)) && resolved != dstAbs { - return fmt.Errorf("tar entry %q symlink target %q escapes destination", hdr.Name, linkTarget) + if err := validateSymlinkTarget(hdr.Name, rel, hdr.Linkname); err != nil { + return err } - _ = os.Remove(target) - if err := os.Symlink(linkTarget, target); err != nil { + _ = root.Remove(rel) + if err := root.Symlink(hdr.Linkname, rel); err != nil { return err } default: @@ -138,17 +138,45 @@ func extractTar(r io.Reader, dst string) error { } } -func safeJoin(dst, name string) (string, error) { - // Ensure the tar entry path is treated as relative so filepath.Join doesn't - // discard dst. We still validate after joining to prevent escapes via "..". - cleaned := filepath.Clean(name) - cleaned = strings.TrimPrefix(cleaned, string(os.PathSeparator)) - if cleaned == "." { - return dst, nil +// tarEntryToLocal converts a tar header name (always slash-separated, may +// have a leading "/") into a local OS path that's guaranteed to stay inside +// the root. Returns "" for the no-op "." / "" entries. Delegates the actual +// safety check to filepath.Localize, which rejects ".." segments and any +// path that can't be a relative local path. +func tarEntryToLocal(name string) (string, error) { + // Strip leading "/" — tar convention for "absolute" entries is to re-root + // them under the destination, not to escape. Strip trailing "/" too since + // tar directory entries carry one and filepath.Localize rejects it. + // filepath.Localize then catches ".." traversal and anything else not + // locally representable. + trimmed := strings.Trim(name, "/") + if trimmed == "" || trimmed == "." { + return "", nil + } + local, err := filepath.Localize(trimmed) + if err != nil { + return "", fmt.Errorf("escapes destination: %w", err) + } + return local, nil +} + +// validateSymlinkTarget rejects symlinks whose target would resolve outside +// the root. os.Root.Symlink itself only creates the link verbatim — it does +// not enforce that the target stays in-root — so we have to check here. +func validateSymlinkTarget(entryName, linkPath, linkTarget string) error { + if filepath.IsAbs(linkTarget) { + return fmt.Errorf("tar entry %q has absolute symlink target %q", entryName, linkTarget) } - target := filepath.Join(dst, cleaned) - if !strings.HasPrefix(target+string(os.PathSeparator), dst+string(os.PathSeparator)) && target != dst { - return "", fmt.Errorf("tar entry %q escapes destination", name) + // Resolve link target relative to the link's *directory*. We use slash- + // based path.Join + path.Clean because tar names are slash-separated and + // the result is then validated as a single relative path. + resolved := path.Join(filepath.ToSlash(filepath.Dir(linkPath)), filepath.ToSlash(linkTarget)) + if resolved == "" || resolved == "." { + return nil } - return target, nil + if !filepath.IsLocal(filepath.FromSlash(resolved)) { + return fmt.Errorf("tar entry %q symlink target %q escapes destination", entryName, linkTarget) + } + return nil } + diff --git a/go/core/internal/skillsinit/oci_test.go b/go/core/internal/skillsinit/oci_test.go index a98575f96d..7984bb8213 100644 --- a/go/core/internal/skillsinit/oci_test.go +++ b/go/core/internal/skillsinit/oci_test.go @@ -12,13 +12,11 @@ import ( "github.com/stretchr/testify/require" ) -// Test_safeJoin_rejectsEscape covers every shape of tar-entry name that the -// original `tar xf` pipeline would have happily honored: absolute paths, -// ".." traversal, and combinations thereof. A malicious skill image is the -// motivating threat — these names must never produce paths outside dst. -func Test_safeJoin_rejectsEscape(t *testing.T) { - dst := "/tmp/skills/dest" - +// Test_tarEntryToLocal_rejectsEscape covers every shape of tar-entry name +// that the original `tar xf` pipeline would have happily honored: absolute +// paths, ".." traversal, and combinations thereof. A malicious skill image +// is the motivating threat — these names must never produce paths outside dst. +func Test_tarEntryToLocal_rejectsEscape(t *testing.T) { cases := []struct { name string entry string @@ -27,19 +25,19 @@ func Test_safeJoin_rejectsEscape(t *testing.T) { {"plain file", "file.txt", false}, {"nested file", "a/b/c.txt", false}, {"dot-only", ".", false}, - {"leading slash stripped", "/file.txt", false}, // joined under dst, not at / + {"leading slash stripped", "/file.txt", false}, // re-rooted under dst, not at / {"traversal", "../escape", true}, {"traversal mid-path", "a/../../escape", true}, - {"absolute escape", "/etc/passwd", false}, // safeJoin strips leading "/" but result is dst/etc/passwd which is under dst — that's intentional + {"absolute escape", "/etc/passwd", false}, // strips leading "/" so result is dst/etc/passwd — under dst, intentional {"deep traversal", "../../../etc/passwd", true}, {"trailing traversal", "a/b/../../..", true}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - _, err := safeJoin(dst, tc.entry) + _, err := tarEntryToLocal(tc.entry) if tc.wantErr { - require.Error(t, err, "safeJoin(%q, %q) must reject", dst, tc.entry) + require.Error(t, err, "tarEntryToLocal(%q) must reject", tc.entry) } else { require.NoError(t, err) } From 88e1949f0973cf9f263e35c19cd5112ee498695e Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Thu, 28 May 2026 18:13:22 +0000 Subject: [PATCH 09/13] test(e2e): drop skills-init Command assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The translator no longer sets Command on the skills-init container — the image's ENTRYPOINT is now the single source of truth. Update the e2e assertion to match. Signed-off-by: Eitan Yarmush --- go/core/test/e2e/invoke_api_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go/core/test/e2e/invoke_api_test.go b/go/core/test/e2e/invoke_api_test.go index c31721702f..68f9e93862 100644 --- a/go/core/test/e2e/invoke_api_test.go +++ b/go/core/test/e2e/invoke_api_test.go @@ -1218,8 +1218,9 @@ func TestE2ESkillImagePullSecrets(t *testing.T) { } require.True(t, foundSecretMount, "skills-init should mount the pull secret volume") - require.Len(t, skillsInit.Command, 1) - require.Equal(t, "/usr/local/bin/skills-init", skillsInit.Command[0]) + // Command is intentionally unset; the skills-init image's ENTRYPOINT is + // the single source of truth for the binary path. + require.Empty(t, skillsInit.Command, "skills-init Command must be empty so ENTRYPOINT runs") // The skills-init binary reads its config from a ConfigMap; verify it // lists each imagePullSecret so the binary will merge their auths. From 4f0da880c1bb8d885dee30ae5dd4e250a6877db8 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Thu, 28 May 2026 18:36:07 +0000 Subject: [PATCH 10/13] style: gofmt skillsinit/{git,oci}.go Signed-off-by: Eitan Yarmush --- go/core/internal/skillsinit/git.go | 1 - go/core/internal/skillsinit/oci.go | 1 - 2 files changed, 2 deletions(-) diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go index 9907a61098..b3bb305f83 100644 --- a/go/core/internal/skillsinit/git.go +++ b/go/core/internal/skillsinit/git.go @@ -110,4 +110,3 @@ func applySubPath(dest, subPath string) error { cleanupTmp = "" return nil } - diff --git a/go/core/internal/skillsinit/oci.go b/go/core/internal/skillsinit/oci.go index d3df0d09ba..5a4501d68b 100644 --- a/go/core/internal/skillsinit/oci.go +++ b/go/core/internal/skillsinit/oci.go @@ -179,4 +179,3 @@ func validateSymlinkTarget(entryName, linkPath, linkTarget string) error { } return nil } - From 47c0e2686616aa6e34b508d9f56d4ede49d25466 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Thu, 28 May 2026 18:57:32 +0000 Subject: [PATCH 11/13] refactor(skills-init): rename GitRef.IsCommit to Full Per review feedback, name the field after what it does (full vs shallow clone) rather than the trigger condition. Default (false) remains shallow to preserve the omitempty wire format. Behavior unchanged: the translator sets Full=true iff the ref is a 40-char commit SHA, since shallow `--branch` does not accept SHAs. Signed-off-by: Eitan Yarmush --- .codex | 0 .../translator/agent/adk_api_translator.go | 10 +++++----- .../controller/translator/agent/git_skills_test.go | 4 ++-- .../testdata/outputs/agent_with_git_skills.json | 4 ++-- go/core/internal/skillsinit/config.go | 12 +++++++----- go/core/internal/skillsinit/git.go | 6 +++--- 6 files changed, 19 insertions(+), 17 deletions(-) create mode 100644 .codex diff --git a/.codex b/.codex new file mode 100644 index 0000000000..e69de29bb2 diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index a78a95739a..d8e62b722d 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -1276,11 +1276,11 @@ func prepareSkillsInitConfig( } cfg.GitRefs = append(cfg.GitRefs, skillsinit.GitRef{ - URL: ref.URL, - Ref: gitRef, - Dest: skillsinit.SkillsDir + "/" + name, - IsCommit: isCommitSHA(gitRef), - SubPath: subPath, + URL: ref.URL, + Ref: gitRef, + Dest: skillsinit.SkillsDir + "/" + name, + Full: isCommitSHA(gitRef), + SubPath: subPath, }) } diff --git a/go/core/internal/controller/translator/agent/git_skills_test.go b/go/core/internal/controller/translator/agent/git_skills_test.go index d2181e888a..175a4eab9d 100644 --- a/go/core/internal/controller/translator/agent/git_skills_test.go +++ b/go/core/internal/controller/translator/agent/git_skills_test.go @@ -369,7 +369,7 @@ func Test_AdkApiTranslator_Skills(t *testing.T) { if tt.wantContainsBranch != "" { found := false for _, g := range cfg.GitRefs { - if g.Ref == tt.wantContainsBranch && !g.IsCommit { + if g.Ref == tt.wantContainsBranch && !g.Full { found = true } } @@ -379,7 +379,7 @@ func Test_AdkApiTranslator_Skills(t *testing.T) { if tt.wantContainsCommit != "" { found := false for _, g := range cfg.GitRefs { - if g.Ref == tt.wantContainsCommit && g.IsCommit { + if g.Ref == tt.wantContainsCommit && g.Full { found = true } } diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json index ee8420af67..29d7fc178b 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/agent_with_git_skills.json @@ -93,7 +93,7 @@ }, { "data": { - "config.json": "{\"authMountPath\":\"/git-auth\",\"gitRefs\":[{\"url\":\"https://github.com/org/my-skills\",\"ref\":\"v2.0.0\",\"dest\":\"/skills/k8s-skill\",\"subPath\":\"skills/k8s\"},{\"url\":\"https://github.com/org/another-skill\",\"ref\":\"abc123def456abc123def456abc123def456abc1\",\"dest\":\"/skills/another-skill\",\"isCommit\":true},{\"url\":\"https://github.com/org/private-skill\",\"ref\":\"main\",\"dest\":\"/skills/private-skill\"}],\"ociRefs\":[{\"image\":\"ghcr.io/org/oci-skill:v1.0\",\"dest\":\"/skills/oci-skill\"}]}" + "config.json": "{\"authMountPath\":\"/git-auth\",\"gitRefs\":[{\"url\":\"https://github.com/org/my-skills\",\"ref\":\"v2.0.0\",\"dest\":\"/skills/k8s-skill\",\"subPath\":\"skills/k8s\"},{\"url\":\"https://github.com/org/another-skill\",\"ref\":\"abc123def456abc123def456abc123def456abc1\",\"dest\":\"/skills/another-skill\",\"full\":true},{\"url\":\"https://github.com/org/private-skill\",\"ref\":\"main\",\"dest\":\"/skills/private-skill\"}],\"ociRefs\":[{\"image\":\"ghcr.io/org/oci-skill:v1.0\",\"dest\":\"/skills/oci-skill\"}]}" }, "metadata": { "name": "git-skills-agent-skills-init", @@ -151,7 +151,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "16162636769325195580" + "kagent.dev/config-hash": "10322571574088408977" }, "labels": { "app": "kagent", diff --git a/go/core/internal/skillsinit/config.go b/go/core/internal/skillsinit/config.go index 3af4d2500b..9fbacae200 100644 --- a/go/core/internal/skillsinit/config.go +++ b/go/core/internal/skillsinit/config.go @@ -55,11 +55,13 @@ type Config struct { // GitRef describes a single git clone operation. type GitRef struct { URL string `json:"url"` - // Ref is a branch name, tag, or commit SHA. When IsCommit is true a - // full clone + checkout is used; otherwise a shallow --branch clone. - Ref string `json:"ref"` - Dest string `json:"dest"` - IsCommit bool `json:"isCommit,omitempty"` + // Ref is a branch name, tag, or commit SHA. + Ref string `json:"ref"` + Dest string `json:"dest"` + // Full requests a full (non-shallow) clone followed by `git checkout`. + // Set when Ref is a commit SHA, since shallow `--branch` does not accept + // SHAs. The default (false) is a depth-1 branch/tag clone. + Full bool `json:"full,omitempty"` // SubPath, if set, names a subdirectory inside the clone that becomes // the final skill root. SubPath string `json:"subPath,omitempty"` diff --git a/go/core/internal/skillsinit/git.go b/go/core/internal/skillsinit/git.go index b3bb305f83..519a4d90d0 100644 --- a/go/core/internal/skillsinit/git.go +++ b/go/core/internal/skillsinit/git.go @@ -12,20 +12,20 @@ import ( // exec.Command — they never pass through a shell, so metacharacters in any of // them are inert. // -// When ref.IsCommit is true we do a full clone then `git checkout `, +// When ref.Full is true we do a full clone then `git checkout `, // because shallow `--branch` does not accept commit SHAs. When false we use a // depth-1 branch/tag clone. // // SubPath, if set, rewrites the destination so the final layout matches the // requested in-repo subdirectory. func CloneGit(ref GitRef) error { - if ref.IsCommit { + if ref.Full { if err := runGit("clone", "--", ref.URL, ref.Dest); err != nil { return err } // `--` separator prevents a ref starting with `-` from being parsed // as a flag. Refs are already validated upstream as 40-char hex when - // IsCommit is true, but defense in depth costs nothing. + // Full is true, but defense in depth costs nothing. if err := runGitIn(ref.Dest, "checkout", "--", ref.Ref); err != nil { return err } From 94a909fd22fcdd7a8229f07286a6d0adb484c371 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Thu, 28 May 2026 19:20:05 +0000 Subject: [PATCH 12/13] chore: remove stray empty .codex file Signed-off-by: Eitan Yarmush --- .codex | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .codex diff --git a/.codex b/.codex deleted file mode 100644 index e69de29bb2..0000000000 From 87fce8cde312cf7df7817bfab662dc9c13b6ef1b Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Fri, 29 May 2026 14:52:33 +0000 Subject: [PATCH 13/13] docs(skills): clarify trusted git skill contents Signed-off-by: Eitan Yarmush --- go/api/config/crd/bases/kagent.dev_agents.yaml | 6 ++++-- go/api/config/crd/bases/kagent.dev_sandboxagents.yaml | 6 ++++-- go/api/v1alpha2/agent_types.go | 4 +++- helm/kagent-crds/templates/kagent.dev_agents.yaml | 6 ++++-- helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml | 6 ++++-- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/go/api/config/crd/bases/kagent.dev_agents.yaml b/go/api/config/crd/bases/kagent.dev_agents.yaml index f54a5879d1..d5fcc47b38 100644 --- a/go/api/config/crd/bases/kagent.dev_agents.yaml +++ b/go/api/config/crd/bases/kagent.dev_agents.yaml @@ -13284,8 +13284,10 @@ spec: URL path segment, without .git). type: string path: - description: Subdirectory within the repo to use as the - skill root. + description: |- + Subdirectory within the repo to use as the skill root. The API validates + this input path, but treats repository contents as trusted: symlinks under + this path are dereferenced when materializing the skill. type: string ref: default: main diff --git a/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml b/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml index b8bc8dce7a..03591d492e 100644 --- a/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml +++ b/go/api/config/crd/bases/kagent.dev_sandboxagents.yaml @@ -10942,8 +10942,10 @@ spec: URL path segment, without .git). type: string path: - description: Subdirectory within the repo to use as the - skill root. + description: |- + Subdirectory within the repo to use as the skill root. The API validates + this input path, but treats repository contents as trusted: symlinks under + this path are dereferenced when materializing the skill. type: string ref: default: main diff --git a/go/api/v1alpha2/agent_types.go b/go/api/v1alpha2/agent_types.go index 661643f54a..94149cc1c4 100644 --- a/go/api/v1alpha2/agent_types.go +++ b/go/api/v1alpha2/agent_types.go @@ -147,7 +147,9 @@ type GitRepo struct { // +kubebuilder:default="main" Ref string `json:"ref,omitempty"` - // Subdirectory within the repo to use as the skill root. + // Subdirectory within the repo to use as the skill root. The API validates + // this input path, but treats repository contents as trusted: symlinks under + // this path are dereferenced when materializing the skill. // +optional Path string `json:"path,omitempty"` diff --git a/helm/kagent-crds/templates/kagent.dev_agents.yaml b/helm/kagent-crds/templates/kagent.dev_agents.yaml index f54a5879d1..d5fcc47b38 100644 --- a/helm/kagent-crds/templates/kagent.dev_agents.yaml +++ b/helm/kagent-crds/templates/kagent.dev_agents.yaml @@ -13284,8 +13284,10 @@ spec: URL path segment, without .git). type: string path: - description: Subdirectory within the repo to use as the - skill root. + description: |- + Subdirectory within the repo to use as the skill root. The API validates + this input path, but treats repository contents as trusted: symlinks under + this path are dereferenced when materializing the skill. type: string ref: default: main diff --git a/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml b/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml index b8bc8dce7a..03591d492e 100644 --- a/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml +++ b/helm/kagent-crds/templates/kagent.dev_sandboxagents.yaml @@ -10942,8 +10942,10 @@ spec: URL path segment, without .git). type: string path: - description: Subdirectory within the repo to use as the - skill root. + description: |- + Subdirectory within the repo to use as the skill root. The API validates + this input path, but treats repository contents as trusted: symlinks under + this path are dereferenced when materializing the skill. type: string ref: default: main