diff --git a/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py b/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py index fb5e10302d7f9..1fc046de5830e 100644 --- a/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py +++ b/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py @@ -344,14 +344,18 @@ def _redact_all( def _redact( self, item: Redactable, name: str | None, depth: int, max_depth: int, replacement: str = "***" ) -> Redacted: - # Avoid spending too much effort on redacting on deeply nested - # structures. This also avoid infinite recursion if a structure has - # reference to self. - if depth > max_depth: - return item try: + # Key-name-based redaction is unbounded by depth — sensitive keys + # must fail closed at any nesting level. The depth cutoff below is + # only used to bound the work of pattern-based string masking and + # to terminate recursion through self-referential iterables. if name and self.should_hide_value_for_key(name): return self._redact_all(item, depth, max_depth, replacement=replacement) + # Always walk dicts so deeper sensitive keys are still caught; + # JSON-loaded payloads cannot be self-referential, and any + # in-memory cycle hits Python's own recursion limit and is caught + # by the except clause below (which fails closed via + # ""). if isinstance(item, dict): to_return = { dict_key: self._redact( @@ -360,6 +364,10 @@ def _redact( for dict_key, subval in item.items() } return to_return + # Avoid spending too much effort on pattern-based masking of + # deeply nested non-dict structures. + if depth > max_depth: + return item if isinstance(item, Enum): return self._redact( item=item.value, name=name, depth=depth, max_depth=max_depth, replacement=replacement diff --git a/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py b/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py index e23cb1c274438..930748e2ed0d0 100644 --- a/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py +++ b/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py @@ -690,6 +690,44 @@ def test_redact_max_depth(self, val, expected, max_depth): got = redact(val, max_depth=max_depth) assert got == expected + @pytest.mark.parametrize( + ("val", "expected"), + [ + # Sensitive key at exactly MAX_RECURSION_DEPTH (5) is redacted. + ( + {"a": {"b": {"c": {"d": {"password": "leaked"}}}}}, + {"a": {"b": {"c": {"d": {"password": "***"}}}}}, + ), + # Sensitive key one level past MAX_RECURSION_DEPTH is also redacted. + ( + {"a": {"b": {"c": {"d": {"e": {"password": "leaked"}}}}}}, + {"a": {"b": {"c": {"d": {"e": {"password": "***"}}}}}}, + ), + # Two levels past MAX_RECURSION_DEPTH, under a non-sensitive + # intermediate key, still fails closed. + ( + {"a": {"b": {"c": {"d": {"e": {"f": {"token": "leaked"}}}}}}}, + {"a": {"b": {"c": {"d": {"e": {"f": {"token": "***"}}}}}}}, + ), + # Other sensitive key names recognised by should_hide_value_for_key. + ( + {"a": {"b": {"c": {"d": {"e": {"secret": "leaked"}}}}}}, + {"a": {"b": {"c": {"d": {"e": {"secret": "***"}}}}}}, + ), + ( + {"a": {"b": {"c": {"d": {"e": {"api_key": "leaked"}}}}}}, + {"a": {"b": {"c": {"d": {"e": {"api_key": "***"}}}}}}, + ), + ], + ) + def test_redact_sensitive_key_past_max_depth(self, val, expected): + secrets_masker = SecretsMasker() + configure_secrets_masker_for_test(secrets_masker) + with patch( + "airflow_shared.secrets_masker.secrets_masker._secrets_masker", return_value=secrets_masker + ): + assert redact(val) == expected + def test_redact_with_str_type(self, logger, caplog): """ SecretsMasker's re replacer has issues handling a redactable item of type