diff --git a/README.md b/README.md index 9172451..ab15550 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,13 @@ This selector will match any `AcrPullBinding` that has `tier` set to either `fro that `environment` is not equal to `prod`. Leaving the flag unset causes the controller to reconcile every `AcrPullBinding` in the cluster. +### Restricting ACR server domains + +The controller restricts which registry domains it will exchange Azure tokens with by using a configured list of domain +suffixes. Each suffix matches the exact domain or any subdomain. The Helm chart defaults this value to `azurecr.io` for +public Azure Container Registry deployments. Operators deploying to sovereign clouds should configure +`allowedACRServerSuffixes` with the appropriate registry suffixes for their environment. + ## A note on pull secrets When `Pod`s are created to fulfill `Deployment`s, `DaemonSet`s, _etc_, `pod.spec.imagePullSecrets` is defaulted from diff --git a/cmd/main.go b/cmd/main.go index 961476f..c790683 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -54,6 +54,7 @@ func main() { var serviceAccountTokenAudience string var ttlRotationFraction float64 var apbLabelSelectorString string + var allowedACRServerSuffixesString string flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, @@ -62,11 +63,13 @@ func main() { flag.StringVar(&serviceAccountTokenAudience, "service-account-token-audience", "api://AzureCRTokenExchange", "The audience to ask the Kubernetes API server to mint Service Account tokens for, must match Federated Identity Credential configuration in Azure.") flag.Float64Var(&ttlRotationFraction, "ttl-rotation-fraction", 0.5, "The fraction of the pull token's TTL at which the v1beta2 reconciler will refresh the token.") flag.StringVar(&apbLabelSelectorString, "label-selector", "", "Kubernetes label selector used to filter AcrPullBindings (e.g. environment!=prod,tier in (frontend,backend))") + flag.StringVar(&allowedACRServerSuffixesString, "allowed-acr-server-suffixes", "", "Comma-separated list of ACR server domain suffixes the controller may exchange tokens with. If empty, no ACR server suffix validation is performed.") opts := zap.Options{ Development: true, } opts.BindFlags(flag.CommandLine) flag.Parse() + allowedACRServerSuffixes := commaSeparatedValues(allowedACRServerSuffixesString) defaultACRServer := os.Getenv(defaultACRServerEnvKey) defaultManagedIdentityResourceID := os.Getenv(defaultManagedIdentityResourceIDEnvKey) defaultManagedIdentityClientID := os.Getenv(defaultManagedIdentityClientIDEnvKey) @@ -151,6 +154,7 @@ func main() { DefaultManagedIdentityClientID: defaultManagedIdentityClientID, DefaultACRServer: defaultACRServer, PullBindingLabelSelectorString: apbLabelSelectorString, + AllowedACRServerSuffixes: allowedACRServerSuffixes, }) if err := apbReconciler.SetupWithManager(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "AcrPullBinding") @@ -172,6 +176,7 @@ func main() { ServiceAccountTokenAudience: serviceAccountTokenAudience, ServiceAccountClient: kubeClient.CoreV1(), PullBindingLabelSelectorString: apbLabelSelectorString, + AllowedACRServerSuffixes: allowedACRServerSuffixes, }) if err := v1beta2Reconciler.SetupWithManager(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "AcrPullBindingV1beta2") @@ -206,3 +211,14 @@ func main() { os.Exit(1) } } + +func commaSeparatedValues(value string) []string { + var values []string + for _, item := range strings.Split(value, ",") { + item = strings.TrimSpace(item) + if item != "" { + values = append(values, item) + } + } + return values +} diff --git a/config/helm/templates/deployment.yaml b/config/helm/templates/deployment.yaml index f9001a8..bb78d6b 100644 --- a/config/helm/templates/deployment.yaml +++ b/config/helm/templates/deployment.yaml @@ -26,6 +26,9 @@ spec: - "--metrics-bind-address=127.0.0.1:8080" - "--leader-elect" - "--ttl-rotation-fraction={{ .Values.ttlRotationFraction }}" + {{- with .Values.allowedACRServerSuffixes }} + - "--allowed-acr-server-suffixes={{ join "," . }}" + {{- end }} image: "{{ .Values.image }}" name: acrpull-controller ports: diff --git a/config/helm/values.yaml b/config/helm/values.yaml index 77ac278..1987e9c 100644 --- a/config/helm/values.yaml +++ b/config/helm/values.yaml @@ -5,3 +5,5 @@ nodeSelector: {} tolerations: [] affinity: {} ttlRotationFraction: 0.5 +allowedACRServerSuffixes: + - azurecr.io diff --git a/internal/controller/acr_server.go b/internal/controller/acr_server.go new file mode 100644 index 0000000..2125c41 --- /dev/null +++ b/internal/controller/acr_server.go @@ -0,0 +1,71 @@ +package controller + +import ( + "fmt" + "net" + "net/url" + "strings" +) + +func validateACRServerSuffix(acrServer string, allowedSuffixes []string) error { + allowedSuffixes = normalizeACRServerSuffixes(allowedSuffixes) + if len(allowedSuffixes) == 0 { + return nil + } + + hostname, err := acrServerHostname(acrServer) + if err != nil { + return fmt.Errorf("ACR server %q is invalid: %w", acrServer, err) + } + + for _, suffix := range allowedSuffixes { + if hostname == suffix || strings.HasSuffix(hostname, "."+suffix) { + return nil + } + } + + return fmt.Errorf("ACR server %q is not in the allowed ACR server suffixes: %s", acrServer, strings.Join(allowedSuffixes, ", ")) +} + +func normalizeACRServerSuffixes(suffixes []string) []string { + normalized := make([]string, 0, len(suffixes)) + seen := map[string]struct{}{} + for _, suffix := range suffixes { + suffix = strings.Trim(strings.ToLower(strings.TrimSpace(suffix)), ".") + if suffix == "" { + continue + } + if _, ok := seen[suffix]; ok { + continue + } + seen[suffix] = struct{}{} + normalized = append(normalized, suffix) + } + return normalized +} + +func acrServerHostname(acrServer string) (string, error) { + trimmed := strings.TrimSpace(acrServer) + if trimmed == "" { + return "", fmt.Errorf("server is empty") + } + + endpoint, err := url.Parse("https://" + trimmed) + if err != nil { + return "", err + } + if endpoint.Host == "" || endpoint.User != nil || endpoint.Path != "" || endpoint.RawQuery != "" || endpoint.Fragment != "" { + return "", fmt.Errorf("server must be a host name without scheme, path, query, or fragment") + } + if strings.Contains(endpoint.Host, ":") { + if _, _, err := net.SplitHostPort(endpoint.Host); err != nil { + return "", fmt.Errorf("server host is invalid: %w", err) + } + } + + hostname := strings.TrimSuffix(strings.ToLower(endpoint.Hostname()), ".") + if hostname == "" { + return "", fmt.Errorf("server hostname is empty") + } + return hostname, nil +} diff --git a/internal/controller/acr_server_test.go b/internal/controller/acr_server_test.go new file mode 100644 index 0000000..909c588 --- /dev/null +++ b/internal/controller/acr_server_test.go @@ -0,0 +1,64 @@ +package controller + +import ( + "strings" + "testing" +) + +func Test_validateACRServerSuffix(t *testing.T) { + for _, testCase := range []struct { + name string + acrServer string + allowedSuffixes []string + wantErr string + }{ + { + name: "empty allow list preserves legacy behavior", + acrServer: "attacker.example.com", + allowedSuffixes: nil, + }, + { + name: "subdomain of configured suffix is allowed", + acrServer: "registry.azurecr.io", + allowedSuffixes: []string{"azurecr.io"}, + }, + { + name: "suffix matching is case insensitive", + acrServer: "Registry.AzureCR.IO", + allowedSuffixes: []string{".AZURECR.IO."}, + }, + { + name: "exact configured domain is allowed", + acrServer: "registry.internal", + allowedSuffixes: []string{"registry.internal"}, + }, + { + name: "lookalike suffix is rejected", + acrServer: "registry.azurecr.io.attacker.example.com", + allowedSuffixes: []string{"azurecr.io"}, + wantErr: `ACR server "registry.azurecr.io.attacker.example.com" is not in the allowed ACR server suffixes: azurecr.io`, + }, + { + name: "paths are rejected when validation is configured", + acrServer: "registry.azurecr.io/oauth2/exchange", + allowedSuffixes: []string{"azurecr.io"}, + wantErr: `ACR server "registry.azurecr.io/oauth2/exchange" is invalid: server must be a host name without scheme, path, query, or fragment`, + }, + } { + t.Run(testCase.name, func(t *testing.T) { + err := validateACRServerSuffix(testCase.acrServer, testCase.allowedSuffixes) + if testCase.wantErr == "" { + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + return + } + if err == nil { + t.Fatalf("expected error containing %q", testCase.wantErr) + } + if !strings.Contains(err.Error(), testCase.wantErr) { + t.Fatalf("expected error containing %q, got %q", testCase.wantErr, err.Error()) + } + }) + } +} diff --git a/internal/controller/acrpullbinding_controller.go b/internal/controller/acrpullbinding_controller.go index e48b70f..3ddbde4 100644 --- a/internal/controller/acrpullbinding_controller.go +++ b/internal/controller/acrpullbinding_controller.go @@ -52,6 +52,7 @@ type V1beta1ReconcilerOpts struct { DefaultManagedIdentityClientID string DefaultACRServer string PullBindingLabelSelectorString string + AllowedACRServerSuffixes []string } func NewV1beta1Reconciler(opts *V1beta1ReconcilerOpts) *AcrPullBindingReconciler { @@ -93,6 +94,10 @@ func NewV1beta1Reconciler(opts *V1beta1ReconcilerOpts) *AcrPullBindingReconciler msiClientID, msiResourceID, acrServer := specOrDefault(opts, binding.Spec) return base36sha224([]byte(msiClientID + msiResourceID + acrServer + binding.Spec.Scope)) }, + ValidateBinding: func(binding *msiacrpullv1beta1.AcrPullBinding) error { + _, _, acrServer := specOrDefault(opts, binding.Spec) + return validateACRServerSuffix(acrServer, opts.AllowedACRServerSuffixes) + }, CreatePullCredential: func(ctx context.Context, binding *msiacrpullv1beta1.AcrPullBinding, serviceAccount *corev1.ServiceAccount) (string, time.Time, error) { msiClientID, msiResourceID, acrServer := specOrDefault(opts, binding.Spec) acrAccessToken, err := opts.Auth.AcquireACRAccessToken(ctx, msiResourceID, msiClientID, acrServer, binding.Spec.Scope) diff --git a/internal/controller/acrpullbinding_controller_test.go b/internal/controller/acrpullbinding_controller_test.go index 2baec3a..d72fc9d 100644 --- a/internal/controller/acrpullbinding_controller_test.go +++ b/internal/controller/acrpullbinding_controller_test.go @@ -178,6 +178,7 @@ func Test_ACRPullBindingController_reconcile(t *testing.T) { serviceAccount *corev1.ServiceAccount pullSecrets []corev1.Secret referencingServiceAccounts []corev1.ServiceAccount + allowedACRServerSuffixes []string registerTokenCall func(*mock_authorizer.MockInterface) @@ -300,6 +301,38 @@ func Test_ACRPullBindingController_reconcile(t *testing.T) { }, }, }, + { + name: "disallowed ACR server fails before token acquisition", + acrBinding: &msiacrpullv1beta1.AcrPullBinding{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "binding", Finalizers: []string{"msi-acrpull.microsoft.com"}}, + Spec: msiacrpullv1beta1.AcrPullBindingSpec{ + ServiceAccountName: "delegate", + AcrServer: "attacker.example.com", + Scope: "repository:testing:pull,push", + ManagedIdentityClientID: "client-id", + ManagedIdentityResourceID: "resource-id", + }, + }, + serviceAccount: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "delegate"}, + }, + allowedACRServerSuffixes: []string{"azurecr.io"}, + output: &action[*msiacrpullv1beta1.AcrPullBinding]{ + updatePullBindingStatus: &msiacrpullv1beta1.AcrPullBinding{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "binding", Finalizers: []string{"msi-acrpull.microsoft.com"}}, + Spec: msiacrpullv1beta1.AcrPullBindingSpec{ + ServiceAccountName: "delegate", + AcrServer: "attacker.example.com", + Scope: "repository:testing:pull,push", + ManagedIdentityClientID: "client-id", + ManagedIdentityResourceID: "resource-id", + }, + Status: msiacrpullv1beta1.AcrPullBindingStatus{ + Error: `ACR server "attacker.example.com" is not in the allowed ACR server suffixes: azurecr.io`, + }, + }, + }, + }, { name: "binding with pull credential updates the service account", acrBinding: &msiacrpullv1beta1.AcrPullBinding{ @@ -1392,6 +1425,7 @@ func Test_ACRPullBindingController_reconcile(t *testing.T) { Auth: fakeAuth, DefaultManagedIdentityResourceID: defaultManagedIdentityResourceID, DefaultACRServer: defaultACRServer, + AllowedACRServerSuffixes: testCase.allowedACRServerSuffixes, }) output := controller.reconcile(context.Background(), logger, testCase.acrBinding, testCase.serviceAccount, testCase.pullSecrets, testCase.referencingServiceAccounts) diff --git a/internal/controller/acrpullbinding_v1beta2_controller.go b/internal/controller/acrpullbinding_v1beta2_controller.go index ed93d13..f96eafc 100644 --- a/internal/controller/acrpullbinding_v1beta2_controller.go +++ b/internal/controller/acrpullbinding_v1beta2_controller.go @@ -43,6 +43,7 @@ type V1beta2ReconcilerOpts struct { ServiceAccountClient corev1client.ServiceAccountsGetter ServiceAccountTokenAudience string PullBindingLabelSelectorString string + AllowedACRServerSuffixes []string // exposed here to allow unit tests to over-write them mintToken ServiceAccountTokenMinter @@ -99,6 +100,9 @@ func NewV1beta2Reconciler(opts *V1beta2ReconcilerOpts) *PullBindingReconciler { GetInputsHash: func(binding *msiacrpullv1beta2.AcrPullBinding) string { return inputsHash(binding.Spec) }, + ValidateBinding: func(binding *msiacrpullv1beta2.AcrPullBinding) error { + return validateACRServerSuffix(binding.Spec.ACR.Server, opts.AllowedACRServerSuffixes) + }, CreatePullCredential: func(ctx context.Context, binding *msiacrpullv1beta2.AcrPullBinding, serviceAccount *corev1.ServiceAccount) (string, time.Time, error) { var tenantId, clientId, token string if binding.Spec.Auth.WorkloadIdentity != nil { diff --git a/internal/controller/acrpullbinding_v1beta2_controller_test.go b/internal/controller/acrpullbinding_v1beta2_controller_test.go index 3b0a0d5..000b373 100644 --- a/internal/controller/acrpullbinding_v1beta2_controller_test.go +++ b/internal/controller/acrpullbinding_v1beta2_controller_test.go @@ -49,6 +49,7 @@ func Test_ACRPullBindingController_v1beta2_reconcile(t *testing.T) { serviceAccount *corev1.ServiceAccount pullSecrets []corev1.Secret referencingServiceAccounts []corev1.ServiceAccount + allowedACRServerSuffixes []string tokenStub func(*testing.T, *msiacrpullv1beta2.AcrPullBinding, *corev1.ServiceAccount) (ServiceAccountTokenMinter, armTokenFetcher, armAcrTokenExchanger) @@ -397,6 +398,50 @@ func Test_ACRPullBindingController_v1beta2_reconcile(t *testing.T) { }, }, }, + { + name: "disallowed ACR server fails before token acquisition", + acrBinding: &msiacrpullv1beta2.AcrPullBinding{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "binding", Finalizers: []string{"msi-acrpull.microsoft.com"}}, + Spec: msiacrpullv1beta2.AcrPullBindingSpec{ + ServiceAccountName: "delegate", + ACR: msiacrpullv1beta2.AcrConfiguration{ + Server: "attacker.example.com", + Scope: "repository:testing:pull,push", + Environment: msiacrpullv1beta2.AzureEnvironmentPublicCloud, + }, + Auth: msiacrpullv1beta2.AuthenticationMethod{ + ManagedIdentity: &msiacrpullv1beta2.ManagedIdentityAuth{ + ClientID: "client-id", + }, + }, + }, + }, + serviceAccount: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "delegate"}, + }, + allowedACRServerSuffixes: []string{"azurecr.io"}, + output: &action[*msiacrpullv1beta2.AcrPullBinding]{ + updatePullBindingStatus: &msiacrpullv1beta2.AcrPullBinding{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "binding", Finalizers: []string{"msi-acrpull.microsoft.com"}}, + Spec: msiacrpullv1beta2.AcrPullBindingSpec{ + ServiceAccountName: "delegate", + ACR: msiacrpullv1beta2.AcrConfiguration{ + Server: "attacker.example.com", + Scope: "repository:testing:pull,push", + Environment: msiacrpullv1beta2.AzureEnvironmentPublicCloud, + }, + Auth: msiacrpullv1beta2.AuthenticationMethod{ + ManagedIdentity: &msiacrpullv1beta2.ManagedIdentityAuth{ + ClientID: "client-id", + }, + }, + }, + Status: msiacrpullv1beta2.AcrPullBindingStatus{ + Error: `ACR server "attacker.example.com" is not in the allowed ACR server suffixes: azurecr.io`, + }, + }, + }, + }, { name: "binding with pull credential updates the service account", acrBinding: &msiacrpullv1beta2.AcrPullBinding{ @@ -1545,6 +1590,7 @@ func Test_ACRPullBindingController_v1beta2_reconcile(t *testing.T) { fetchArmToken: fetchArmToken, exchangeArmTokenForAcrToken: exchangeArmTokenForAcrToken, TTLRotationFraction: 0.5, + AllowedACRServerSuffixes: testCase.allowedACRServerSuffixes, }) output := controller.reconcile(context.Background(), logger, testCase.acrBinding, testCase.serviceAccount, testCase.pullSecrets, testCase.referencingServiceAccounts) diff --git a/internal/controller/generic_controller.go b/internal/controller/generic_controller.go index 6f5c7e7..ad15c51 100644 --- a/internal/controller/generic_controller.go +++ b/internal/controller/generic_controller.go @@ -34,6 +34,7 @@ type genericReconciler[O pullBinding] struct { GetServiceAccountName func(O) string GetPullSecretName func(O) string GetInputsHash func(O) string + ValidateBinding func(O) error CreatePullCredential func(context.Context, O, *corev1.ServiceAccount) (string, time.Time, error) @@ -136,6 +137,13 @@ func (r *genericReconciler[O]) reconcile(ctx context.Context, logger logr.Logger return r.cleanUp(acrBinding, serviceAccount, pullSecrets, logger) } + if r.ValidateBinding != nil { + if err := r.ValidateBinding(acrBinding); err != nil { + logger.Info(err.Error()) + return &action[O]{updatePullBindingStatus: r.UpdateStatusError(acrBinding, err.Error())} + } + } + // if the user changed which service account should be bound to this credential, we need to // un-bind the credential from any service accounts it was bound to previously; if we're in // the middle of cleaning up old pull secrets from this binding, we need to make sure all of