diff --git a/.github/workflows/security-fast.yml b/.github/workflows/security-fast.yml index 791ad53..bc355ff 100644 --- a/.github/workflows/security-fast.yml +++ b/.github/workflows/security-fast.yml @@ -87,11 +87,18 @@ jobs: - name: Run pip-audit run: | + # Dev-only transitive CVEs — no runtime impact, fixes require py3.10+ (we support 3.9) # GHSA-5239-wwwm-4pmq: pygments ReDoS in AdlLexer (dev-only, no fix available) # GHSA-58qw-9mgm-455v: pip tar/zip confusion (pip itself, no fix available) + # GHSA-jp4c-xjxw-mgf9: pip self-update import ordering (fix requires py3.10+) + # GHSA-qccp-gfcp-xxvc: urllib3 cross-origin header leak (fix 2.7.0 requires py3.10+) + # GHSA-mf9v-mfxr-j63j: urllib3 decompression bomb (fix 2.7.0 requires py3.10+) uv run pip-audit --desc --format json --output pip-audit-report.json \ --ignore-vuln GHSA-5239-wwwm-4pmq \ - --ignore-vuln GHSA-58qw-9mgm-455v + --ignore-vuln GHSA-58qw-9mgm-455v \ + --ignore-vuln GHSA-jp4c-xjxw-mgf9 \ + --ignore-vuln GHSA-qccp-gfcp-xxvc \ + --ignore-vuln GHSA-mf9v-mfxr-j63j - name: Upload report if: always() diff --git a/.hooks/check-no-internal-docs.sh b/.hooks/check-no-internal-docs.sh new file mode 100755 index 0000000..36b0319 --- /dev/null +++ b/.hooks/check-no-internal-docs.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# Block internal development artifacts from being committed to this public repo. +# Matched files belong in tooling/, strategy/, or MCP memory — not here. +echo "BLOCKED - Internal development files must not be committed to this public repo" +echo "Files:" +for f in "$@"; do echo " $f"; done +echo "Move to tooling/, strategy/, or MCP memory instead." +exit 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6235c0b..704edd5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,6 +39,25 @@ repos: files: \.rs$ pass_filenames: false + # Block internal development artifacts from public repo + - repo: local + hooks: + - id: check-no-internal-docs + name: Block internal docs from public repo + entry: .hooks/check-no-internal-docs.sh + language: script + files: | + (?x)^( + docs/superpowers/| + \.spec-workflow/specs/| + strategy/| + tooling/sessions/| + sessions/tasks/| + CALIBER_LEARNINGS\.md$| + \.caliber/ + ) + pass_filenames: true + # GitHub Actions workflow linting - repo: https://github.com/rhysd/actionlint rev: 914e7df21a07ef503a81201c76d2b11c789d3fca # v1.7.12 # pragma: allowlist secret diff --git a/Cargo.lock b/Cargo.lock index ad7a594..abcacc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -231,7 +231,7 @@ dependencies = [ [[package]] name = "cachekit-rs" -version = "0.3.1" +version = "0.6.0" dependencies = [ "cachekit-core", "criterion", diff --git a/src/cachekit/decorators/wrapper.py b/src/cachekit/decorators/wrapper.py index 959a7e4..2b4827a 100644 --- a/src/cachekit/decorators/wrapper.py +++ b/src/cachekit/decorators/wrapper.py @@ -484,6 +484,16 @@ def create_cache_wrapper( # Generated lazily on first use or regenerated after cache_clear() function_identifier = f"{func.__module__}.{func.__qualname__}" + # Detect whether the wrapped function accepts parameters. + # Used to distinguish "invalidate the zero-arg entry" from "invalidate ALL entries". + _func_has_params = bool(inspect.signature(func).parameters) + + # Track all cache keys written by this function (for no-args invalidation). + # When invalidate_cache() is called with no args on a parameterized function, + # we need to clear ALL entries — but key normalization (hashing of long keys) + # makes prefix matching unreliable. Tracking actual keys is simple and correct. + _cached_keys: set[str] = set() + # Create stats tracker (session ID will be lazy-initialized on first use) # Pass l1_enabled for rate limit classification header _stats = _FunctionStats(function_identifier=function_identifier, l1_enabled=l1_enabled) @@ -590,6 +600,7 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> Any: # noqa: PLR0912 ) if _l1_cache and cache_key and serialized_bytes: _l1_cache.put(cache_key, serialized_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) except Exception as e: # Serialization/storage failed but function succeeded - log and return result logger().debug(f"L1-only mode: serialization/storage failed for {cache_key}: {e}") @@ -797,6 +808,7 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> Any: # noqa: PLR0912 # Also store in L1 cache for fast subsequent access (using serialized bytes) if _l1_cache and cache_key and serialized_bytes: _l1_cache.put(cache_key, serialized_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) # Record successful cache set set_duration_ms = (time.time() - start_time) * 1000 @@ -930,6 +942,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any: ) if _l1_cache and cache_key and serialized_bytes: _l1_cache.put(cache_key, serialized_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) except Exception as e: # Serialization/storage failed but function succeeded - log and return result logger().debug(f"L1-only mode: serialization/storage failed for {cache_key}: {e}") @@ -1032,6 +1045,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any: # cached_data is already serialized bytes from Redis cached_bytes = cached_data.encode("utf-8") if isinstance(cached_data, str) else cached_data _l1_cache.put(cache_key, cached_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) # Handle TTL refresh if configured and threshold met if refresh_ttl_on_get and ttl and hasattr(_backend, "get_ttl") and hasattr(_backend, "refresh_ttl"): @@ -1096,6 +1110,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any: cached_data.encode("utf-8") if isinstance(cached_data, str) else cached_data ) _l1_cache.put(cache_key, cached_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) return result except Exception as e: @@ -1121,6 +1136,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any: cached_data.encode("utf-8") if isinstance(cached_data, str) else cached_data ) _l1_cache.put(cache_key, cached_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) return result except Exception: @@ -1156,6 +1172,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any: serialized_data.encode("utf-8") if isinstance(serialized_data, str) else serialized_data ) _l1_cache.put(cache_key, serialized_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) # Record successful cache set set_duration_ms = (time.perf_counter() - start_time) * 1000 @@ -1235,6 +1252,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any: serialized_data.encode("utf-8") if isinstance(serialized_data, str) else serialized_data ) _l1_cache.put(cache_key, serialized_bytes, redis_ttl=ttl) + _cached_keys.add(cache_key) # Record successful cache set set_duration_ms = (time.perf_counter() - start_time) * 1000 @@ -1289,14 +1307,32 @@ def invalidate_cache(*args: Any, **kwargs: Any) -> None: # If backend creation fails, can't invalidate L2 _logger.debug("Failed to get backend for invalidation: %s", e) - # Clear both L2 (backend) and L1 cache + # Fix #59: When called with no args on a parameterized function, + # invalidate ALL cached entries for this function. + # Without this, it generates a key for zero-arg call (never cached) → no-op. + if not args and not kwargs and _func_has_params: + # Snapshot prevents RuntimeError if another thread adds during iteration + keys_snapshot = set(_cached_keys) + for key in keys_snapshot: + if _l1_cache: + _l1_cache.invalidate(key) + if _backend and not _l1_only_mode: + invalidator.set_backend(_backend) + try: + _backend.delete(key) + except Exception as e: + _logger.debug("Failed to delete L2 key %s: %s", key, e) + continue # keep key tracked for retry + _cached_keys.discard(key) + return + + # Single-key invalidation (specific args provided, or zero-param function) cache_key = operation_handler.get_cache_key(func, args, kwargs, namespace, integrity_checking) - # Clear L1 cache first if _l1_cache and cache_key: _l1_cache.invalidate(cache_key) + _cached_keys.discard(cache_key) - # Clear L2 cache via invalidator (skip in L1-only mode) if _backend and not _l1_only_mode: invalidator.set_backend(_backend) invalidator.invalidate_cache(func, args, kwargs, namespace) @@ -1314,12 +1350,29 @@ async def ainvalidate_cache(*args: Any, **kwargs: Any) -> None: # If backend creation fails, can't invalidate L2 _logger.debug("Failed to get backend for async invalidation: %s", e) - # Clear both L2 (backend) and L1 cache + # Fix #59: When called with no args on a parameterized function, + # invalidate ALL cached entries for this function. + if not args and not kwargs and _func_has_params: + keys_snapshot = set(_cached_keys) + for key in keys_snapshot: + if _l1_cache: + _l1_cache.invalidate(key) + if _backend and not _l1_only_mode: + invalidator.set_backend(_backend) + try: + _backend.delete(key) + except Exception as e: + _logger.debug("Failed to delete L2 key %s: %s", key, e) + continue + _cached_keys.discard(key) + return + + # Single-key invalidation (specific args provided, or zero-param function) cache_key = operation_handler.get_cache_key(func, args, kwargs, namespace, integrity_checking) - # Clear L1 cache first if _l1_cache and cache_key: _l1_cache.invalidate(cache_key) + _cached_keys.discard(cache_key) # Clear L2 cache via invalidator (skip in L1-only mode) if _backend and not _l1_only_mode: diff --git a/tests/unit/test_invalidate_no_args.py b/tests/unit/test_invalidate_no_args.py new file mode 100644 index 0000000..ee98b5b --- /dev/null +++ b/tests/unit/test_invalidate_no_args.py @@ -0,0 +1,329 @@ +""" +Test for #59: invalidate_cache() / ainvalidate_cache() with no args on parameterized functions. + +Bug: When invalidate_cache() is called with no arguments on a function that HAS parameters, +it generates a cache key for the zero-argument call (which was never cached) and invalidates +that non-existent key. All cached entries for real argument combinations survive. + +Expected: calling invalidate_cache() with no args on a parameterized function should clear +ALL cached entries for that function (namespace-level invalidation). +""" + +from __future__ import annotations + +import pytest + +from cachekit import cache +from cachekit.backends.file import FileBackend, FileBackendConfig + + +@pytest.mark.unit +class TestInvalidateNoArgs: + """Reproduce #59: invalidate_cache() no-op on parameterized functions.""" + + def test_sync_invalidate_no_args_clears_all_entries(self): + """invalidate_cache() with no args should clear all cached entries.""" + call_count = 0 + + @cache(backend=None, ttl=300, namespace="test_sync_invalidate_no_args") + def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + # Populate cache with two different argument combinations + result1 = expensive("hello") + result2 = expensive("world") + assert call_count == 2 + + # Verify cache hits + assert expensive("hello") == result1 + assert expensive("world") == result2 + assert call_count == 2 # no new calls + + # Invalidate with no args — should clear ALL entries + expensive.invalidate_cache() + + # Both entries should be gone — function must be called again + expensive("hello") + expensive("world") + assert call_count == 4, ( + f"Expected 4 calls after invalidation, got {call_count}. " + "invalidate_cache() with no args did not clear cached entries." + ) + + def test_sync_invalidate_with_args_clears_single_entry(self): + """invalidate_cache(specific_args) should only clear that one entry.""" + call_count = 0 + + @cache(backend=None, ttl=300, namespace="test_sync_invalidate_with_args") + def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + expensive("hello") + expensive("world") + assert call_count == 2 + + # Invalidate only "hello" + expensive.invalidate_cache("hello") + + # "hello" should miss, "world" should still hit + expensive("hello") + assert call_count == 3 + expensive("world") + assert call_count == 3 # still cached + + def test_sync_no_param_function_invalidate_still_works(self): + """invalidate_cache() on a zero-param function should still clear its entry.""" + call_count = 0 + + @cache(backend=None, ttl=300, namespace="test_sync_no_param") + def no_params() -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + no_params() + assert call_count == 1 + no_params() + assert call_count == 1 # cached + + no_params.invalidate_cache() + + no_params() + assert call_count == 2 # cache was cleared + + @pytest.mark.asyncio + async def test_async_invalidate_no_args_clears_all_entries(self): + """ainvalidate_cache() with no args should clear all cached entries.""" + call_count = 0 + + @cache(backend=None, ttl=300, namespace="test_async_invalidate_no_args") + async def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + result1 = await expensive("hello") + result2 = await expensive("world") + assert call_count == 2 + + # Verify cache hits + assert await expensive("hello") == result1 + assert await expensive("world") == result2 + assert call_count == 2 + + # Invalidate with no args + await expensive.ainvalidate_cache() + + # Both should be recalculated + await expensive("hello") + await expensive("world") + assert call_count == 4, ( + f"Expected 4 calls after invalidation, got {call_count}. " + "ainvalidate_cache() with no args did not clear cached entries." + ) + + def test_cache_clear_clears_all_entries(self): + """cache_clear() should clear all cached entries for parameterized functions.""" + call_count = 0 + + @cache(backend=None, ttl=300, namespace="test_cache_clear_all") + def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + expensive("hello") + expensive("world") + assert call_count == 2 + + expensive.cache_clear() + + expensive("hello") + expensive("world") + assert call_count == 4, ( + f"Expected 4 calls after cache_clear(), got {call_count}. " + "cache_clear() did not clear cached entries for parameterized function." + ) + + +@pytest.mark.unit +class TestInvalidateNoArgsWithL2Backend: + """Exercise the L2 (backend) mass-invalidation path using FileBackend.""" + + def test_file_backend_invalidate_no_args_clears_l2(self, tmp_path): + """invalidate_cache() with no args should delete entries from both L1 and L2.""" + call_count = 0 + backend = FileBackend(FileBackendConfig(cache_dir=str(tmp_path), max_size_mb=256)) + + @cache(backend=backend, ttl=300, namespace="test_file_l2_invalidate") + def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + # Populate L1 + L2 + result1 = expensive("hello") + result2 = expensive("world") + assert call_count == 2 + + # Verify cache hits (served from L1) + assert expensive("hello") == result1 + assert expensive("world") == result2 + assert call_count == 2 + + # Invalidate all — should clear both L1 and L2 + expensive.invalidate_cache() + + # Both should miss and recompute + expensive("hello") + expensive("world") + assert call_count == 4, ( + f"Expected 4 calls after invalidation, got {call_count}. L2 entries survived invalidate_cache() with no args." + ) + + def test_file_backend_partial_failure_retains_keys(self, tmp_path): + """If L2 delete fails, the key stays in _cached_keys for retry.""" + from unittest.mock import patch + + call_count = 0 + backend = FileBackend(FileBackendConfig(cache_dir=str(tmp_path), max_size_mb=256)) + + @cache(backend=backend, ttl=300, namespace="test_file_partial_fail") + def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + expensive("hello") + expensive("world") + assert call_count == 2 + + # Make L2 delete fail for all keys + with patch.object(backend, "delete", side_effect=Exception("disk error")): + expensive.invalidate_cache() + + # L1 was cleared (invalidate always succeeds for L1), but L2 keys + # should still be tracked. We can't easily check _cached_keys directly, + # but we can verify a second invalidation attempt works when the backend + # is healthy again. + expensive.invalidate_cache() + + # Now both L1 and L2 should be clear + expensive("hello") + expensive("world") + assert call_count == 4 + + @pytest.mark.asyncio + async def test_async_file_backend_partial_failure_retains_keys(self, tmp_path): + """Async: if L2 delete fails, the key stays tracked for retry.""" + from unittest.mock import patch + + call_count = 0 + backend = FileBackend(FileBackendConfig(cache_dir=str(tmp_path), max_size_mb=256)) + + @cache(backend=backend, ttl=300, namespace="test_async_file_partial_fail") + async def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + await expensive("hello") + assert call_count == 1 + + with patch.object(backend, "delete", side_effect=Exception("disk error")): + await expensive.ainvalidate_cache() + + # L2 delete failed → key still tracked. Second attempt with healthy backend: + await expensive.ainvalidate_cache() + + await expensive("hello") + assert call_count == 2 + + @pytest.mark.asyncio + async def test_async_file_backend_invalidate_with_specific_args(self, tmp_path): + """Async: ainvalidate_cache(specific_args) clears only that entry from L2.""" + call_count = 0 + backend = FileBackend(FileBackendConfig(cache_dir=str(tmp_path), max_size_mb=256)) + + @cache(backend=backend, ttl=300, namespace="test_async_file_specific_args") + async def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + await expensive("hello") + await expensive("world") + assert call_count == 2 + + await expensive.ainvalidate_cache("hello") + + await expensive("hello") + assert call_count == 3 # recalculated + await expensive("world") + assert call_count == 3 # still cached + + @pytest.mark.asyncio + async def test_async_file_backend_invalidate_no_args_clears_l2(self, tmp_path): + """Async ainvalidate_cache() with no args should clear L2 entries via FileBackend.""" + call_count = 0 + backend = FileBackend(FileBackendConfig(cache_dir=str(tmp_path), max_size_mb=256)) + + @cache(backend=backend, ttl=300, namespace="test_async_file_l2_invalidate") + async def expensive(query: str) -> str: + nonlocal call_count + call_count += 1 + return f"result_{call_count}" + + await expensive("hello") + await expensive("world") + assert call_count == 2 + + await expensive.ainvalidate_cache() + + await expensive("hello") + await expensive("world") + assert call_count == 4, ( + f"Expected 4 calls after async invalidation, got {call_count}. L2 entries survived ainvalidate_cache() with no args." + ) + + +@pytest.mark.unit +class TestInvalidateNoArgsCrossFunctionIsolation: + """Ensure invalidation doesn't leak across functions.""" + + def test_invalidate_does_not_affect_other_functions_same_namespace(self): + """Invalidating fn_a should not affect fn_b even if they share a namespace.""" + a_count = 0 + b_count = 0 + ns = "test_cross_function_isolation" + + @cache(backend=None, ttl=300, namespace=ns) + def fn_a(x: int) -> str: + nonlocal a_count + a_count += 1 + return f"a_{a_count}" + + @cache(backend=None, ttl=300, namespace=ns) + def fn_b(x: int) -> str: + nonlocal b_count + b_count += 1 + return f"b_{b_count}" + + # Populate both + fn_a(1) + fn_b(1) + assert a_count == 1 + assert b_count == 1 + + # Invalidate only fn_a + fn_a.invalidate_cache() + + # fn_a should miss, fn_b should still hit + fn_a(1) + assert a_count == 2 # recalculated + fn_b(1) + assert b_count == 1 # still cached diff --git a/uv.lock b/uv.lock index ed4eeb2..2aa9ab3 100644 --- a/uv.lock +++ b/uv.lock @@ -236,7 +236,7 @@ filecache = [ [[package]] name = "cachekit" -version = "0.5.1" +version = "0.6.0" source = { editable = "." } dependencies = [ { name = "blake3" },