diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0b6796a7..346b400b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,20 @@ repos: - id: check-toml - id: check-merge-conflict - id: check-added-large-files + # Cross-OS filename collisions: this repo ships a macOS Homebrew bottle, so two + # paths differing only in case would break a case-insensitive checkout. + - id: check-case-conflict + # Defense-in-depth alongside gitleaks: never commit a literal private key. + - id: detect-private-key + + # Spell-check code, comments, and docs (Kubernetes' verify-spelling, generalized). + # Config (skips + ignore-words) lives in [tool.codespell] in pyproject.toml; check.sh + # runs the same tool via `uvx codespell`. + - repo: https://github.com/codespell-project/codespell + rev: v2.4.2 + hooks: + - id: codespell + additional_dependencies: [tomli] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.15.16 diff --git a/AGENTS.md b/AGENTS.md index c74f27b2..5224bffe 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,7 +25,7 @@ uv run assembly --help # run the CLI from the locked environment Dev tooling is a PEP 735 `[dependency-groups]` group with `default-groups = ["dev"]`, not a `[project]` extra — `uv sync --extra dev` errors. -`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.` +`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `codespell` (spell-check code/comments/docs via `uvx`; config in `[tool.codespell]`) → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → unused snapshot/fixture gate (`scripts/unused_fixtures_gate.py`: orphaned `.ambr`/API fixtures, since xdist disables syrupy's own unused detection) → docs consistency gate (`scripts/docs_consistency_gate.py`: REFERENCE.md/README.md env vars, exit codes, and `assembly …` command refs stay in sync with the code) → docstring coverage gate (`scripts/docstring_coverage_gate.py`: public-API docstring ratchet, an `interrogate` stand-in that handles PEP 695 generics) → `brew audit --strict` (the shipped `Formula/assembly.rb`; self-skips without Homebrew) → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.` **Commits are gated.** On success `check.sh` records a working-tree signature (`scripts/gate_marker.py record` → `.git/aai-gate-pass`), and a PreToolUse hook (`.claude/hooks/require-gate-before-commit.sh`) blocks `git commit` unless that signature still matches — so run the full gate to completion *before* committing (a single-file `pytest` does not satisfy it), and re-run it after any further edit. Iterate with the fast targeted commands above, gate once at the end. For a deliberate work-in-progress commit, prefix `AAI_ALLOW_COMMIT=1 git commit …`. @@ -34,7 +34,7 @@ Individual tools (all via `uv run`): ```sh uv run ruff check . # lint uv run ruff format . # format (line-length 100) -uv run mypy # files = ["aai_cli", "tests"] from pyproject; strict (disallow_untyped_defs on src) +uv run mypy # files = ["aai_cli", "tests"] from pyproject; src is full --strict bar disallow_untyped_calls (jiwer ships no stubs); tests relax the untyped-body flags prettier --check "aai_cli/init/templates/**/*.{js,css}" # JS/CSS template formatting uv run pytest -q # default unit suite uv run pytest tests/test_transcribe.py -q # a single file diff --git a/aai_cli/AGENTS.md b/aai_cli/AGENTS.md index 1a7191fc..9e3dccee 100644 --- a/aai_cli/AGENTS.md +++ b/aai_cli/AGENTS.md @@ -97,6 +97,7 @@ heavily-reworked commands with long bodies; small commands keep the inline - **`environments.py`** — a frozen `Environment` (api_base, streaming_host, llm_gateway_base, ams_base, stytch_*). `DEFAULT_ENV` is **`production`**; use `--sandbox` (or `--env sandbox000` / `AAI_ENV`) to target the sandbox. The active environment is a process-global set once at startup; precedence: `--env` → `AAI_ENV` → profile's stored env → default. A credential is only valid against the environment that minted it. - **`client.py`** — thin wrappers over the `assemblyai` SDK (`transcribe`, `list_transcripts`, `stream_audio`, etc.). It normalizes SDK exceptions: auth failures become a single clean `auth_failure()` `CLIError`; everything else becomes `APIError`. New SDK calls should follow this try/except shape. - **`errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` switches to machine-readable output; it is never auto-enabled — `output.resolve_json()` deliberately keeps human text the default even when piped or agent-run. +- **Raw `subprocess` and `os.environ`/`os.getenv` are fenced by ruff `banned-api` (TID251).** Only the modules allowlisted in `pyproject.toml`'s `per-file-ignores` may call them — process spawning is meant to go through `procs.py`, and environment reads through the config/env-resolution layer. A new module reaching for either trips the gate, so adding one is a deliberate, reviewable allowlist edit (the Deno toolchain's per-crate `clippy.toml` model). Tests and `scripts/` are exempt. - **`debuglog.py`** — the root `-v/--verbose` flag (count: `-v` request-level at INFO, `-vv` wire-level at DEBUG). The CLI normally configures no logging, and the realtime paths *silence* library loggers (`ws.py`, `streaming/diagnostics.py`); verbose mode installs one redacting stderr handler and those silencers stand down. Secrets are registered at their resolution choke points (`config.resolve_api_key`, `AppState.resolve_session`) and masked in every rendered record — websockets logs the raw Authorization header at DEBUG, so masking lives in the formatter, not at call sites. Stdlib-only on purpose: `config` (a Rich-free layer) imports it. ### Feature subsystems diff --git a/pyproject.toml b/pyproject.toml index 2e5bd6df..964a98f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,11 +175,37 @@ warn_unreachable = true disallow_any_generics = true no_implicit_reexport = true extra_checks = true +# The remaining flags from mypy --strict, which aai_cli already satisfies. They close +# gaps the above leave open: a function with *some* annotations but an unannotated +# arg/return (disallow_incomplete_defs), the body of any still-untyped function going +# unchecked (check_untyped_defs), an untyped decorator silently erasing a function's +# type (disallow_untyped_decorators), subclassing an Any-typed base (disallow_subclassing_any), +# and a config option that no longer matches any file (warn_unused_configs). The one +# strict flag left off is disallow_untyped_calls: jiwer ships no stubs, so wer.py's +# RemovePunctuation() call is unavoidably untyped, and turning it on would force a +# net-new `# type: ignore` the escape-hatch gate rejects. +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +disallow_subclassing_any = true +warn_unused_configs = true [[tool.mypy.overrides]] -# Tests are type-checked too, but pytest functions don't need return annotations. +# Tests are type-checked too, but pytest functions don't need return annotations +# (disallow_untyped_defs/disallow_incomplete_defs), and the bodies of those untyped +# helpers — full of mock plumbing and ad-hoc fixtures — would drown the signal if +# type-checked (check_untyped_defs) or block subclassing untyped test doubles +# (disallow_subclassing_any) / wrapping them in untyped decorators +# (disallow_untyped_decorators). The strict flags stay on for the shipped package. +# With check_untyped_defs off, mypy emits an `annotation-unchecked` note per annotated +# untyped helper; silence those notes so the test output stays signal. module = "tests.*" disallow_untyped_defs = false +disallow_incomplete_defs = false +check_untyped_defs = false +disallow_subclassing_any = false +disallow_untyped_decorators = false +disable_error_code = ["annotation-unchecked"] [tool.pyright] # Second type checker alongside mypy: pyright catches a different class of @@ -211,9 +237,25 @@ extend-exclude = ["aai_cli/_version.py"] # A/N/FBT/PL/T20/PT/PIE/PERF/TCH add maintainability pressure: naming/shadowing, # boolean traps, pylint-style design issues, centralized raw output, pytest style, # small simplifications, performance footguns, and type-only import hygiene. +# ASYNC/LOG/G/DTZ/FLY/ICN/SLOT/ISC/TID add correctness pressure the above miss and the +# codebase already satisfies (so they're forward-looking, zero-churn enforcement): +# ASYNC — blocking calls (time.sleep, open(), sync HTTP) inside the streaming/agent +# asyncio code, which would stall the event loop; +# LOG/G — logging anti-patterns (f-strings/`.format` in log calls, `exception()` +# outside handlers) in debuglog and friends; +# DTZ — naive datetime construction (timezone bugs); +# FLY — static `str.join` that should be an f-string (pairs with UP); +# ISC — implicitly concatenated string literals across lines (the classic +# missing-comma-in-a-list bug); ISC001 is owned by the formatter (ignored); +# ICN/SLOT — import-convention and __slots__ hygiene; +# TID — relative imports (banned outright below) so every import is absolute, +# reinforcing the import-linter architecture contracts; +# T10 — a forgotten breakpoint()/pdb/ipdb left in the shipped code (the debugger +# counterpart to the T20 print ban already selected). select = ["E", "F", "I", "UP", "B", "BLE", "C4", "SIM", "RET", "PTH", "ARG", "S", "RUF", "PGH", "ERA", "TRY", "TD", "FIX", "A", "N", "FBT", "PL", "C90", "T20", "PT", - "PIE", "PERF", "TCH"] + "PIE", "PERF", "TCH", "ASYNC", "LOG", "G", "DTZ", "FLY", "ICN", "SLOT", "ISC", + "TID", "T10"] # E501: line length is owned by the formatter. # B008: Typer uses function calls (typer.Option/Argument) as parameter defaults. # S603/S607: we intentionally shell out to `claude`/`npx` with controlled args. @@ -223,13 +265,35 @@ select = ["E", "F", "I", "UP", "B", "BLE", "C4", "SIM", "RET", "PTH", "ARG", "S" # PLC0415: optional/heavy runtime deps are imported lazily to keep startup fast. # TC001-TC003: the project intentionally keeps readable top-level type imports; TC006 # still enforces quoted runtime casts. +# ISC001: single-line implicit string concatenation is managed by the formatter, which +# would otherwise fight this lint (ruff's own recommendation when both are enabled). ignore = ["E501", "B008", "S603", "S607", "TRY003", "N818", "PLC0415", - "TC001", "TC002", "TC003"] + "TC001", "TC002", "TC003", "ISC001"] # Function-size pressure, tuned to keep functions small enough to read and edit in # one screen (the friction a coding agent hits most). These complement xenon's # cyclomatic-complexity gate in check.sh: mccabe (C901) and max-branches bound # branchiness; max-statements bounds raw length; max-args bounds signatures. +[tool.ruff.lint.flake8-tidy-imports] +# Every intra-package import is already absolute (`from aai_cli.x import y`); banning +# relative imports outright keeps it that way, which makes modules movable and the +# import-linter contracts unambiguous. +ban-relative-imports = "all" + +# Disallowed-methods enforcement, modeled on the Deno toolchain's per-crate clippy.toml +# bans (only designated crates may call the fenced std methods). Process spawning and +# raw environment access stay confined to the modules that legitimately own them +# (allowlisted via per-file-ignores below); any *new* module reaching for them trips +# TID251, so adding one is a visible, reviewable edit rather than a silent spread. +# The matcher is AST-based, so the os.environ snippets inside the code_gen --show-code +# exemplars (string literals) don't trip it. +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"subprocess".msg = "Spawn detached children via aai_cli.procs; if a module genuinely needs raw subprocess, add it to the TID251 allowlist in pyproject.toml." +"os.environ".msg = "Resolve configuration through aai_cli.config / aai_cli.context (which centralize precedence and secret handling); env-owning modules are allowlisted for TID251 in pyproject.toml." +"os.getenv".msg = "Use os.environ.get (the single project idiom) via an env-owning module; see the TID251 allowlist in pyproject.toml." +"os.putenv".msg = "os.putenv/os.unsetenv bypass os.environ and desync the mapping; mutate os.environ instead." +"os.unsetenv".msg = "os.putenv/os.unsetenv bypass os.environ and desync the mapping; mutate os.environ instead." + [tool.ruff.lint.mccabe] max-complexity = 10 # matches xenon's grade-B ceiling (CC <= 10) so the two agree @@ -244,9 +308,14 @@ max-statements = 40 # TRY300: test helpers commonly `return` inside a try while asserting on the except path. # Tests also keep literal exit codes, local imports, composite assertions, and fake # call signatures where those make the intent clearer than production-style indirection. +# TID251: tests drive the CLI as a subprocess and monkeypatch os.environ freely; the +# banned-api ban targets the shipped aai_cli package, not the test harness or dev gates. +# DTZ: tests build naive datetimes as deterministic fixtures (the suite pins TZ in +# conftest and uses time-machine), so timezone-aware construction isn't required here. "tests/**" = ["S101", "S105", "S106", "S107", "S108", "ARG001", "ARG002", "ARG005", "PTH123", "SIM117", "TRY300", "FBT", "PLR2004", "PLC0415", "PLR0913", - "PLW1510", "N806", "N818", "PLW0108", "PT018", "TCH"] + "PLW1510", "N806", "N818", "PLW0108", "PT018", "TCH", "TID251", "DTZ"] +"scripts/**" = ["TID251"] # Typer command functions naturally have many boolean options and broad signatures # (PLR0913/FBT). Their *bodies*, though, are held to the same length/branch limits as # the rest of the package: PLR0912/PLR0915/C901 are deliberately NOT ignored here. @@ -257,18 +326,50 @@ max-statements = 40 # command signatures do. "aai_cli/options.py" = ["FBT003"] # Raw stdout/stderr writes are centralized here; command modules call output helpers. -"aai_cli/output.py" = ["T201"] +# TID251: output owns the FORCE_COLOR/NO_COLOR env toggles and TTY/agent detection. +"aai_cli/output.py" = ["T201", "TID251"] # The active environment is process-global startup state by design. -"aai_cli/environments.py" = ["PLW0603"] +# TID251: environments.py owns AAI_ENV resolution (an env-owning module). +"aai_cli/environments.py" = ["PLW0603", "TID251"] # Verbosity is process-global startup state by design (mirrors environments.py). "aai_cli/debuglog.py" = ["PLW0603"] # BaseHTTPRequestHandler.log_message requires a parameter named `format`. "aai_cli/auth/loopback.py" = ["A002"] # Template constants include URL path names such as TOKEN_PATH, not credentials. -"aai_cli/init/templates/**" = ["S105"] +# TID251: the scaffolds are end-user example apps that read their own config straight +# from os.environ — that's correct, idiomatic code to ship, not a CLI-internal env read. +"aai_cli/init/templates/**" = ["S105", "TID251"] + +# TID251 banned-api allowlist (see [tool.ruff.lint.flake8-tidy-imports.banned-api]). +# These are the only modules permitted raw `subprocess` (process spawning) or raw +# `os.environ`/`os.getenv` (environment access). Splitting the ignore per file keeps the +# blast radius explicit: a new module needing either must be added here in review. +# Process-spawning modules (shell out to claude/npx/ffmpeg/yt-dlp/tunnels/etc.): +"aai_cli/procs.py" = ["TID251"] +"aai_cli/coding_agent.py" = ["TID251"] +"aai_cli/mediafile.py" = ["TID251"] +"aai_cli/setup_exec.py" = ["TID251"] +"aai_cli/commands/deploy/_exec.py" = ["TID251"] +"aai_cli/commands/update.py" = ["TID251"] +"aai_cli/commands/webhooks/_listen.py" = ["TID251"] +"aai_cli/init/runner.py" = ["TID251"] +"aai_cli/init/tunnel.py" = ["TID251"] +"aai_cli/streaming/macos.py" = ["TID251"] +"aai_cli/streaming/sources.py" = ["TID251"] +# Environment-owning modules (config/auth/env resolution; output & environments are +# allowlisted above alongside their existing ignores): +"aai_cli/config.py" = ["TID251"] +"aai_cli/context.py" = ["TID251"] +"aai_cli/update_check.py" = ["TID251"] +"aai_cli/auth/endpoints.py" = ["TID251"] +"aai_cli/init/keys.py" = ["TID251"] +"aai_cli/commands/dev/_exec.py" = ["TID251"] +"aai_cli/commands/share/_exec.py" = ["TID251"] +"aai_cli/commands/evaluate/_hf_api.py" = ["TID251"] # ENV_CLIENT_TOKEN holds an env-var *name*; the shipped token constant is empty in -# source (release builds inject the write-only client token). -"aai_cli/telemetry.py" = ["S105"] +# source (release builds inject the write-only client token). TID251: telemetry reads +# its opt-out / intake-URL / CI-detection env vars (an env-owning module). +"aai_cli/telemetry.py" = ["S105", "TID251"] [tool.vulture] paths = ["aai_cli", "tests"] @@ -278,6 +379,16 @@ ignore_decorators = ["@app.command", "@app.callback"] ignore_names = ["app", "capture_output", "download", "healthy", "ist", "lpath", "memory_keyring", "org", "preserve_logging_state", "refresh", "rpath"] +[tool.codespell] +# Spell-check code, comments, and docs (Kubernetes' verify-spelling, generalized). Run via +# `uvx codespell` in check.sh and as a pre-commit hook, so it needs no entry in uv.lock. +# Skip generated/binary/snapshot trees and the lockfile; recorded fixtures and snapshots +# are byte-pinned and must not be "corrected". +skip = "./.venv,./dist,./docs,./node_modules,./.git,uv.lock,*.ambr,./tests/fixtures,./aai_cli/_version.py" +# Domain words codespell misreads: "unparseable" (accepted variant), "ist" (an identifier), +# "expresso" (a deliberate mis-transcription used as an eval/WER example). +ignore-words-list = "unparseable,ist,expresso,notin,ans" + [tool.deptry] exclude = ["docs", "dist", ".venv", "aai_cli/init/templates"] diff --git a/scripts/check.sh b/scripts/check.sh index a89e1f14..e238a58d 100755 --- a/scripts/check.sh +++ b/scripts/check.sh @@ -103,6 +103,16 @@ fi echo "==> markdownlint (docs/ is generated, so excluded)" markdownlint "**/*.md" --ignore docs --ignore node_modules --ignore .pytest_cache +echo "==> codespell (spell-check code, comments, docs)" +# Kubernetes' verify-spelling, generalized. Config (skips + ignore-words) is in +# [tool.codespell] in pyproject.toml. Run via uvx (like twine below) so it needs no +# entry in uv.lock; pre-commit also runs it. uvx self-skips if offline/unavailable. +if command -v uvx >/dev/null 2>&1; then + uvx codespell . +else + echo " uvx not found; skipping (pre-commit + CI run codespell)" +fi + echo "==> json validity (all tracked + staged *.json)" # Parse every JSON file so a malformed dashboard / vercel.json / fixture fails here # instead of silently downstream (a bad dashboard just won't import). Validity only — @@ -170,6 +180,32 @@ trap - EXIT echo "==> init template contract/import gate" uv run python scripts/template_contract_gate.py +echo "==> unused snapshot/fixture gate" +# xdist disables syrupy's own unused-snapshot detection, so a renamed/deleted test can +# leave an orphaned .ambr or recorded API fixture behind. This static check catches it. +uv run python scripts/unused_fixtures_gate.py + +echo "==> docs consistency gate (env vars / exit codes / command refs)" +# curl's "every option is documented" presubmit, generalized: REFERENCE.md/README.md must +# not drift from the code — every env var and exit code is documented, every `assembly …` +# example names a real command. +uv run python scripts/docs_consistency_gate.py + +echo "==> docstring coverage gate (public API ratchet)" +# interrogate can't parse this codebase's PEP 695 generics, so an ast-based ratchet stands +# in: public-API docstring coverage may not drop below the floor in scripts/. +uv run python scripts/docstring_coverage_gate.py + +echo "==> brew audit (Homebrew formula)" +# Lint the formula we ship (Formula/assembly.rb) the way Homebrew's own CI does, so a +# formula regression fails here instead of on the release PR. brew is macOS/Linuxbrew +# only, so this self-skips where it isn't installed (CI's release path has it). +if command -v brew >/dev/null 2>&1; then + brew audit --strict --formula Formula/assembly.rb +else + echo " brew not found; skipping (Homebrew CI / release runner has it)" +fi + echo "==> pytest (with branch-coverage gate)" # Exclude e2e: they drive the CLI as a subprocess (uncounted by coverage) and need # a live API key. Exclude install (real per-template dep install, slow + network). diff --git a/scripts/docs_consistency_gate.py b/scripts/docs_consistency_gate.py new file mode 100644 index 00000000..6d495e37 --- /dev/null +++ b/scripts/docs_consistency_gate.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import typer + +from aai_cli.main import app + +# Docs-stay-in-sync gate, in the spirit of curl's "every option is documented" presubmit +# and numpy's refguide-check: the reference doc and the code must not drift apart. Three +# checks, all static and fast: +# 1. Environment-variable parity — every AAI_*/ASSEMBLYAI_* var the code reads is either +# documented in REFERENCE.md or explicitly listed as internal here, and every such +# documented var is actually read (no stale rows). +# 2. Exit-code parity — every numeric exit code the code returns is in REFERENCE.md's +# exit-code table. +# 3. Command-reference validity — every `assembly []` example in the docs +# names a real command (catches a doc that outlives a rename). + +REPO_ROOT = Path(__file__).resolve().parent.parent +REFERENCE = REPO_ROOT / "REFERENCE.md" +DOC_SOURCES = (REPO_ROOT / "README.md", REFERENCE) +PACKAGE = REPO_ROOT / "aai_cli" + +# Vars the code reads that are deliberately undocumented: telemetry plumbing overrides and +# the scaffold's product-config vars (written into a generated app's .env, not CLI behavior). +INTERNAL_VARS = { + "AAI_TELEMETRY_CLIENT_TOKEN", + "AAI_TELEMETRY_INTAKE_URL", + "AAI_MACOS_AUDIO_DEBUG", + "ASSEMBLYAI_BASE_URL", + "ASSEMBLYAI_LLM_GATEWAY_URL", + "ASSEMBLYAI_STREAMING_HOST", + "ASSEMBLYAI_AGENTS_HOST", +} + +_VAR_RE = re.compile(r"\b((?:AAI|ASSEMBLYAI)_[A-Z0-9_]+)\b") +_DOC_VAR_RE = re.compile(r"`((?:AAI|ASSEMBLYAI)_[A-Z0-9_]+)`") +_EXIT_DOC_RE = re.compile(r"\|\s*`(\d+)`\s*\|") +_EXIT_CODE_RE = re.compile(r"exit_code\s*[=:]\s*(\d+)|Exit\(code=(\d+)\)") +_CMD_RE = re.compile(r"\bassembly\s+([a-z][\w-]*)(?:\s+([a-z][\w-]*))?") + + +def _package_sources() -> str: + return "\n".join( + p.read_text(encoding="utf-8") + for p in PACKAGE.rglob("*.py") + if "templates" not in p.parts and p.name != "_version.py" + ) + + +def _env_var_errors() -> list[str]: + code = _package_sources() + code_vars = set(_VAR_RE.findall(code)) + doc_vars = set(_DOC_VAR_RE.findall(REFERENCE.read_text(encoding="utf-8"))) + return [ + f"env var {var} is read in code but not documented in REFERENCE.md" + for var in sorted(code_vars - doc_vars - INTERNAL_VARS) + ] + [ + f"env var {var} is documented in REFERENCE.md but never read in code" + for var in sorted(doc_vars - code_vars - INTERNAL_VARS) + ] + + +def _exit_code_errors() -> list[str]: + documented = {int(m) for m in _EXIT_DOC_RE.findall(REFERENCE.read_text(encoding="utf-8"))} + errors: list[str] = [] + for path in sorted(PACKAGE.rglob("*.py")): + if "templates" in path.parts: + continue + for groups in _EXIT_CODE_RE.findall(path.read_text(encoding="utf-8")): + code = int(next(g for g in groups if g)) + if code not in documented: + rel = path.relative_to(REPO_ROOT) + errors.append(f"exit code {code} used in {rel} is not in REFERENCE.md's table") + return errors + + +def _command_tree() -> tuple[set[str], dict[str, set[str]]]: + root = typer.main.get_command(app) + commands = getattr(root, "commands", {}) + groups = { + name: set(getattr(obj, "commands", {})) + for name, obj in commands.items() + if hasattr(obj, "commands") + } + return set(commands), groups + + +def _command_ref_errors() -> list[str]: + top, groups = _command_tree() + errors: list[str] = [] + for doc in DOC_SOURCES: + for cmd, sub in _CMD_RE.findall(doc.read_text(encoding="utf-8")): + if cmd not in top: + errors.append(f"{doc.name}: `assembly {cmd}` names an unknown command") + elif sub and cmd in groups and sub not in groups[cmd]: + errors.append(f"{doc.name}: `assembly {cmd} {sub}` names an unknown subcommand") + return errors + + +def main() -> int: + errors = _env_var_errors() + _exit_code_errors() + _command_ref_errors() + if not errors: + sys.stdout.write("Docs and code agree (env vars, exit codes, command references).\n") + return 0 + for err in errors: + sys.stdout.write(f"{err}\n") + sys.stdout.write("Update REFERENCE.md/README.md (or the INTERNAL_VARS allowlist) to match.\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/docstring_coverage_gate.py b/scripts/docstring_coverage_gate.py new file mode 100644 index 00000000..c78bfde8 --- /dev/null +++ b/scripts/docstring_coverage_gate.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import ast +import sys +from pathlib import Path + +# Docstring-coverage ratchet for the shipped package's public API, replacing `interrogate` +# (which can't parse this codebase's PEP 695 generics, e.g. `def emit[T](...)`). Public = +# the module plus every non-underscore class/function/method. The FLOOR is set at the +# current level and only ever ratchets up: a change may not drop public-API documentation +# below it, but nobody is forced to backfill the existing gap in one go. Raising FLOOR as +# coverage climbs is a deliberate, reviewed edit here — the same model as a coverage gate. +FLOOR = 64.0 + +PACKAGE = Path(__file__).resolve().parent.parent / "aai_cli" + +_Def = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef) + + +def _public_nodes(tree: ast.Module) -> list[ast.AST]: + nodes: list[ast.AST] = [tree] + nodes.extend(n for n in ast.walk(tree) if isinstance(n, _Def) and not n.name.startswith("_")) + return nodes + + +def _coverage() -> tuple[int, int, list[str]]: + total = documented = 0 + missing: list[str] = [] + for path in sorted(PACKAGE.rglob("*.py")): + if "templates" in path.parts or path.name == "_version.py": + continue + tree = ast.parse(path.read_text(encoding="utf-8")) + for node in _public_nodes(tree): + total += 1 + if ast.get_docstring(node): + documented += 1 + else: + name = getattr(node, "name", "") + missing.append(f"{path.relative_to(PACKAGE.parent)}:{name}") + return documented, total, missing + + +def main() -> int: + documented, total, missing = _coverage() + pct = 100.0 * documented / total if total else 100.0 + if pct + 1e-9 >= FLOOR: + sys.stdout.write(f"Public docstring coverage {pct:.1f}% (>= floor {FLOOR}%).\n") + return 0 + sys.stdout.write( + f"Public docstring coverage {pct:.1f}% fell below the {FLOOR}% floor " + f"({documented}/{total}). Add docstrings to public APIs you touched:\n" + ) + for item in missing[:20]: + sys.stdout.write(f" {item}\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/unused_fixtures_gate.py b/scripts/unused_fixtures_gate.py new file mode 100644 index 00000000..6961f6f7 --- /dev/null +++ b/scripts/unused_fixtures_gate.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Orphaned-test-artifact gate, modeled on the Deno toolchain's "every `.out` file must be +# referenced by a test" check (tools/lint.js). The unit suite runs under pytest-xdist +# (`-n auto`), which disables syrupy's own unused-snapshot reporting — each worker only +# sees a slice of the snapshots — so a renamed or deleted test can silently leave its +# whole snapshot file or a recorded API fixture behind to rot. This catches that +# statically and fast, with no extra test run. +# +# Two artifact kinds are checked: +# * tests/__snapshots__/.ambr — syrupy names a snapshot file after its test +# module, so each `.ambr` must have a matching tests/.py. +# * tests/fixtures/api/.json — replay fixtures are loaded by stem +# (replay_fixtures.load_object("")), so each must be referenced by name in +# some test module (the loader module itself doesn't count). + +REPO_ROOT = Path(__file__).resolve().parent.parent +TESTS_DIR = REPO_ROOT / "tests" +SNAPSHOT_DIR = TESTS_DIR / "__snapshots__" +API_FIXTURE_DIR = TESTS_DIR / "fixtures" / "api" +# The fixture loader names every stem in its own docstring/paths, so it can't count as a +# real reference — only an actual test that loads the fixture should keep it alive. +LOADER_MODULE = "replay_fixtures.py" + + +def _orphaned_snapshots() -> list[Path]: + """`.ambr` files whose owning test module no longer exists.""" + return [ + ambr.relative_to(REPO_ROOT) + for ambr in sorted(SNAPSHOT_DIR.glob("*.ambr")) + if not (TESTS_DIR / f"{ambr.stem}.py").exists() + ] + + +def _test_sources() -> list[str]: + """Bodies of every test module except the fixture loader.""" + return [ + path.read_text(encoding="utf-8") + for path in sorted(TESTS_DIR.rglob("*.py")) + if path.name != LOADER_MODULE + ] + + +def _unreferenced_fixtures() -> list[Path]: + """API fixtures whose stem is never named by a test module.""" + if not API_FIXTURE_DIR.exists(): + return [] + sources = _test_sources() + return [ + fixture.relative_to(REPO_ROOT) + for fixture in sorted(API_FIXTURE_DIR.glob("*.json")) + if not any(fixture.stem in source for source in sources) + ] + + +def main() -> int: + snapshot_orphans = _orphaned_snapshots() + fixture_orphans = _unreferenced_fixtures() + if not snapshot_orphans and not fixture_orphans: + sys.stdout.write("No orphaned snapshots or unreferenced fixtures.\n") + return 0 + for path in snapshot_orphans: + sys.stdout.write(f"Orphaned snapshot (no matching test module): {path}\n") + for path in fixture_orphans: + sys.stdout.write(f"Unreferenced API fixture (no test loads it): {path}\n") + sys.stdout.write("Delete the dead artifact, or wire it back into a test.\n") + return 1 + + +if __name__ == "__main__": + sys.exit(main())