diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0b6796a7..346b400b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,6 +13,20 @@ repos:
       - id: check-toml
       - id: check-merge-conflict
       - id: check-added-large-files
+      # Cross-OS filename collisions: this repo ships a macOS Homebrew bottle, so two
+      # paths differing only in case would break a case-insensitive checkout.
+      - id: check-case-conflict
+      # Defense-in-depth alongside gitleaks: never commit a literal private key.
+      - id: detect-private-key
+
+  # Spell-check code, comments, and docs (Kubernetes' verify-spelling, generalized).
+  # Config (skips + ignore-words) lives in [tool.codespell] in pyproject.toml; check.sh
+  # runs the same tool via `uvx codespell`.
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.2
+    hooks:
+      - id: codespell
+        additional_dependencies: [tomli]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.15.16
diff --git a/AGENTS.md b/AGENTS.md
index c74f27b2..5224bffe 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -25,7 +25,7 @@ uv run assembly --help            # run the CLI from the locked environment
 
 Dev tooling is a PEP 735 `[dependency-groups]` group with `default-groups = ["dev"]`, not a `[project]` extra — `uv sync --extra dev` errors.
 
-`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.`
+`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `codespell` (spell-check code/comments/docs via `uvx`; config in `[tool.codespell]`) → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → unused snapshot/fixture gate (`scripts/unused_fixtures_gate.py`: orphaned `.ambr`/API fixtures, since xdist disables syrupy's own unused detection) → docs consistency gate (`scripts/docs_consistency_gate.py`: REFERENCE.md/README.md env vars, exit codes, and `assembly …` command refs stay in sync with the code) → docstring coverage gate (`scripts/docstring_coverage_gate.py`: public-API docstring ratchet, an `interrogate` stand-in that handles PEP 695 generics) → `brew audit --strict` (the shipped `Formula/assembly.rb`; self-skips without Homebrew) → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.`
 
 **Commits are gated.** On success `check.sh` records a working-tree signature (`scripts/gate_marker.py record` → `.git/aai-gate-pass`), and a PreToolUse hook (`.claude/hooks/require-gate-before-commit.sh`) blocks `git commit` unless that signature still matches — so run the full gate to completion *before* committing (a single-file `pytest` does not satisfy it), and re-run it after any further edit. Iterate with the fast targeted commands above, gate once at the end. For a deliberate work-in-progress commit, prefix `AAI_ALLOW_COMMIT=1 git commit …`.
 
@@ -34,7 +34,7 @@ Individual tools (all via `uv run`):
 ```sh
 uv run ruff check .          # lint
 uv run ruff format .         # format (line-length 100)
-uv run mypy                  # files = ["aai_cli", "tests"] from pyproject; strict (disallow_untyped_defs on src)
+uv run mypy                  # files = ["aai_cli", "tests"] from pyproject; src is full --strict bar disallow_untyped_calls (jiwer ships no stubs); tests relax the untyped-body flags
 prettier --check "aai_cli/init/templates/**/*.{js,css}"  # JS/CSS template formatting
 uv run pytest -q             # default unit suite
 uv run pytest tests/test_transcribe.py -q              # a single file
diff --git a/aai_cli/AGENTS.md b/aai_cli/AGENTS.md
index 1a7191fc..9e3dccee 100644
--- a/aai_cli/AGENTS.md
+++ b/aai_cli/AGENTS.md
@@ -97,6 +97,7 @@ heavily-reworked commands with long bodies; small commands keep the inline
 - **`environments.py`** — a frozen `Environment` (api_base, streaming_host, llm_gateway_base, ams_base, stytch_*). `DEFAULT_ENV` is **`production`**; use `--sandbox` (or `--env sandbox000` / `AAI_ENV`) to target the sandbox. The active environment is a process-global set once at startup; precedence: `--env` → `AAI_ENV` → profile's stored env → default. A credential is only valid against the environment that minted it.
 - **`client.py`** — thin wrappers over the `assemblyai` SDK (`transcribe`, `list_transcripts`, `stream_audio`, etc.). It normalizes SDK exceptions: auth failures become a single clean `auth_failure()` `CLIError`; everything else becomes `APIError`. New SDK calls should follow this try/except shape.
 - **`errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` switches to machine-readable output; it is never auto-enabled — `output.resolve_json()` deliberately keeps human text the default even when piped or agent-run.
+- **Raw `subprocess` and `os.environ`/`os.getenv` are fenced by ruff `banned-api` (TID251).** Only the modules allowlisted in `pyproject.toml`'s `per-file-ignores` may call them — process spawning is meant to go through `procs.py`, and environment reads through the config/env-resolution layer. A new module reaching for either trips the gate, so adding one is a deliberate, reviewable allowlist edit (the Deno toolchain's per-crate `clippy.toml` model). Tests and `scripts/` are exempt.
 - **`debuglog.py`** — the root `-v/--verbose` flag (count: `-v` request-level at INFO, `-vv` wire-level at DEBUG). The CLI normally configures no logging, and the realtime paths *silence* library loggers (`ws.py`, `streaming/diagnostics.py`); verbose mode installs one redacting stderr handler and those silencers stand down. Secrets are registered at their resolution choke points (`config.resolve_api_key`, `AppState.resolve_session`) and masked in every rendered record — websockets logs the raw Authorization header at DEBUG, so masking lives in the formatter, not at call sites. Stdlib-only on purpose: `config` (a Rich-free layer) imports it.
 
 ### Feature subsystems
diff --git a/pyproject.toml b/pyproject.toml
index 2e5bd6df..964a98f2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,11 +175,37 @@ warn_unreachable = true
 disallow_any_generics = true
 no_implicit_reexport = true
 extra_checks = true
+# The remaining flags from mypy --strict, which aai_cli already satisfies. They close
+# gaps the above leave open: a function with *some* annotations but an unannotated
+# arg/return (disallow_incomplete_defs), the body of any still-untyped function going
+# unchecked (check_untyped_defs), an untyped decorator silently erasing a function's
+# type (disallow_untyped_decorators), subclassing an Any-typed base (disallow_subclassing_any),
+# and a config option that no longer matches any file (warn_unused_configs). The one
+# strict flag left off is disallow_untyped_calls: jiwer ships no stubs, so wer.py's
+# RemovePunctuation() call is unavoidably untyped, and turning it on would force a
+# net-new `# type: ignore` the escape-hatch gate rejects.
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+disallow_subclassing_any = true
+warn_unused_configs = true
 
 [[tool.mypy.overrides]]
-# Tests are type-checked too, but pytest functions don't need return annotations.
+# Tests are type-checked too, but pytest functions don't need return annotations
+# (disallow_untyped_defs/disallow_incomplete_defs), and the bodies of those untyped
+# helpers — full of mock plumbing and ad-hoc fixtures — would drown the signal if
+# type-checked (check_untyped_defs) or block subclassing untyped test doubles
+# (disallow_subclassing_any) / wrapping them in untyped decorators
+# (disallow_untyped_decorators). The strict flags stay on for the shipped package.
+# With check_untyped_defs off, mypy emits an `annotation-unchecked` note per annotated
+# untyped helper; silence those notes so the test output stays signal.
 module = "tests.*"
 disallow_untyped_defs = false
+disallow_incomplete_defs = false
+check_untyped_defs = false
+disallow_subclassing_any = false
+disallow_untyped_decorators = false
+disable_error_code = ["annotation-unchecked"]
 
 [tool.pyright]
 # Second type checker alongside mypy: pyright catches a different class of
@@ -211,9 +237,25 @@ extend-exclude = ["aai_cli/_version.py"]
 # A/N/FBT/PL/T20/PT/PIE/PERF/TCH add maintainability pressure: naming/shadowing,
 # boolean traps, pylint-style design issues, centralized raw output, pytest style,
 # small simplifications, performance footguns, and type-only import hygiene.
+# ASYNC/LOG/G/DTZ/FLY/ICN/SLOT/ISC/TID add correctness pressure the above miss and the
+# codebase already satisfies (so they're forward-looking, zero-churn enforcement):
+#   ASYNC — blocking calls (time.sleep, open(), sync HTTP) inside the streaming/agent
+#           asyncio code, which would stall the event loop;
+#   LOG/G — logging anti-patterns (f-strings/`.format` in log calls, `exception()`
+#           outside handlers) in debuglog and friends;
+#   DTZ   — naive datetime construction (timezone bugs);
+#   FLY   — static `str.join` that should be an f-string (pairs with UP);
+#   ISC   — implicitly concatenated string literals across lines (the classic
+#           missing-comma-in-a-list bug); ISC001 is owned by the formatter (ignored);
+#   ICN/SLOT — import-convention and __slots__ hygiene;
+#   TID   — relative imports (banned outright below) so every import is absolute,
+#           reinforcing the import-linter architecture contracts;
+#   T10   — a forgotten breakpoint()/pdb/ipdb left in the shipped code (the debugger
+#           counterpart to the T20 print ban already selected).
 select = ["E", "F", "I", "UP", "B", "BLE", "C4", "SIM", "RET", "PTH", "ARG", "S", "RUF",
           "PGH", "ERA", "TRY", "TD", "FIX", "A", "N", "FBT", "PL", "C90", "T20", "PT",
-          "PIE", "PERF", "TCH"]
+          "PIE", "PERF", "TCH", "ASYNC", "LOG", "G", "DTZ", "FLY", "ICN", "SLOT", "ISC",
+          "TID", "T10"]
 # E501: line length is owned by the formatter.
 # B008: Typer uses function calls (typer.Option/Argument) as parameter defaults.
 # S603/S607: we intentionally shell out to `claude`/`npx` with controlled args.
@@ -223,13 +265,35 @@ select = ["E", "F", "I", "UP", "B", "BLE", "C4", "SIM", "RET", "PTH", "ARG", "S"
 # PLC0415: optional/heavy runtime deps are imported lazily to keep startup fast.
 # TC001-TC003: the project intentionally keeps readable top-level type imports; TC006
 #   still enforces quoted runtime casts.
+# ISC001: single-line implicit string concatenation is managed by the formatter, which
+#   would otherwise fight this lint (ruff's own recommendation when both are enabled).
 ignore = ["E501", "B008", "S603", "S607", "TRY003", "N818", "PLC0415",
-          "TC001", "TC002", "TC003"]
+          "TC001", "TC002", "TC003", "ISC001"]
 
 # Function-size pressure, tuned to keep functions small enough to read and edit in
 # one screen (the friction a coding agent hits most). These complement xenon's
 # cyclomatic-complexity gate in check.sh: mccabe (C901) and max-branches bound
 # branchiness; max-statements bounds raw length; max-args bounds signatures.
+[tool.ruff.lint.flake8-tidy-imports]
+# Every intra-package import is already absolute (`from aai_cli.x import y`); banning
+# relative imports outright keeps it that way, which makes modules movable and the
+# import-linter contracts unambiguous.
+ban-relative-imports = "all"
+
+# Disallowed-methods enforcement, modeled on the Deno toolchain's per-crate clippy.toml
+# bans (only designated crates may call the fenced std methods). Process spawning and
+# raw environment access stay confined to the modules that legitimately own them
+# (allowlisted via per-file-ignores below); any *new* module reaching for them trips
+# TID251, so adding one is a visible, reviewable edit rather than a silent spread.
+# The matcher is AST-based, so the os.environ snippets inside the code_gen --show-code
+# exemplars (string literals) don't trip it.
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"subprocess".msg = "Spawn detached children via aai_cli.procs; if a module genuinely needs raw subprocess, add it to the TID251 allowlist in pyproject.toml."
+"os.environ".msg = "Resolve configuration through aai_cli.config / aai_cli.context (which centralize precedence and secret handling); env-owning modules are allowlisted for TID251 in pyproject.toml."
+"os.getenv".msg = "Use os.environ.get (the single project idiom) via an env-owning module; see the TID251 allowlist in pyproject.toml."
+"os.putenv".msg = "os.putenv/os.unsetenv bypass os.environ and desync the mapping; mutate os.environ instead."
+"os.unsetenv".msg = "os.putenv/os.unsetenv bypass os.environ and desync the mapping; mutate os.environ instead."
+
 [tool.ruff.lint.mccabe]
 max-complexity = 10  # matches xenon's grade-B ceiling (CC <= 10) so the two agree
 
@@ -244,9 +308,14 @@ max-statements = 40
 # TRY300: test helpers commonly `return` inside a try while asserting on the except path.
 # Tests also keep literal exit codes, local imports, composite assertions, and fake
 # call signatures where those make the intent clearer than production-style indirection.
+# TID251: tests drive the CLI as a subprocess and monkeypatch os.environ freely; the
+# banned-api ban targets the shipped aai_cli package, not the test harness or dev gates.
+# DTZ: tests build naive datetimes as deterministic fixtures (the suite pins TZ in
+# conftest and uses time-machine), so timezone-aware construction isn't required here.
 "tests/**" = ["S101", "S105", "S106", "S107", "S108", "ARG001", "ARG002", "ARG005",
               "PTH123", "SIM117", "TRY300", "FBT", "PLR2004", "PLC0415", "PLR0913",
-              "PLW1510", "N806", "N818", "PLW0108", "PT018", "TCH"]
+              "PLW1510", "N806", "N818", "PLW0108", "PT018", "TCH", "TID251", "DTZ"]
+"scripts/**" = ["TID251"]
 # Typer command functions naturally have many boolean options and broad signatures
 # (PLR0913/FBT). Their *bodies*, though, are held to the same length/branch limits as
 # the rest of the package: PLR0912/PLR0915/C901 are deliberately NOT ignored here.
@@ -257,18 +326,50 @@ max-statements = 40
 # command signatures do.
 "aai_cli/options.py" = ["FBT003"]
 # Raw stdout/stderr writes are centralized here; command modules call output helpers.
-"aai_cli/output.py" = ["T201"]
+# TID251: output owns the FORCE_COLOR/NO_COLOR env toggles and TTY/agent detection.
+"aai_cli/output.py" = ["T201", "TID251"]
 # The active environment is process-global startup state by design.
-"aai_cli/environments.py" = ["PLW0603"]
+# TID251: environments.py owns AAI_ENV resolution (an env-owning module).
+"aai_cli/environments.py" = ["PLW0603", "TID251"]
 # Verbosity is process-global startup state by design (mirrors environments.py).
 "aai_cli/debuglog.py" = ["PLW0603"]
 # BaseHTTPRequestHandler.log_message requires a parameter named `format`.
 "aai_cli/auth/loopback.py" = ["A002"]
 # Template constants include URL path names such as TOKEN_PATH, not credentials.
-"aai_cli/init/templates/**" = ["S105"]
+# TID251: the scaffolds are end-user example apps that read their own config straight
+# from os.environ — that's correct, idiomatic code to ship, not a CLI-internal env read.
+"aai_cli/init/templates/**" = ["S105", "TID251"]
+
+# TID251 banned-api allowlist (see [tool.ruff.lint.flake8-tidy-imports.banned-api]).
+# These are the only modules permitted raw `subprocess` (process spawning) or raw
+# `os.environ`/`os.getenv` (environment access). Splitting the ignore per file keeps the
+# blast radius explicit: a new module needing either must be added here in review.
+# Process-spawning modules (shell out to claude/npx/ffmpeg/yt-dlp/tunnels/etc.):
+"aai_cli/procs.py" = ["TID251"]
+"aai_cli/coding_agent.py" = ["TID251"]
+"aai_cli/mediafile.py" = ["TID251"]
+"aai_cli/setup_exec.py" = ["TID251"]
+"aai_cli/commands/deploy/_exec.py" = ["TID251"]
+"aai_cli/commands/update.py" = ["TID251"]
+"aai_cli/commands/webhooks/_listen.py" = ["TID251"]
+"aai_cli/init/runner.py" = ["TID251"]
+"aai_cli/init/tunnel.py" = ["TID251"]
+"aai_cli/streaming/macos.py" = ["TID251"]
+"aai_cli/streaming/sources.py" = ["TID251"]
+# Environment-owning modules (config/auth/env resolution; output & environments are
+# allowlisted above alongside their existing ignores):
+"aai_cli/config.py" = ["TID251"]
+"aai_cli/context.py" = ["TID251"]
+"aai_cli/update_check.py" = ["TID251"]
+"aai_cli/auth/endpoints.py" = ["TID251"]
+"aai_cli/init/keys.py" = ["TID251"]
+"aai_cli/commands/dev/_exec.py" = ["TID251"]
+"aai_cli/commands/share/_exec.py" = ["TID251"]
+"aai_cli/commands/evaluate/_hf_api.py" = ["TID251"]
 # ENV_CLIENT_TOKEN holds an env-var *name*; the shipped token constant is empty in
-# source (release builds inject the write-only client token).
-"aai_cli/telemetry.py" = ["S105"]
+# source (release builds inject the write-only client token). TID251: telemetry reads
+# its opt-out / intake-URL / CI-detection env vars (an env-owning module).
+"aai_cli/telemetry.py" = ["S105", "TID251"]
 
 [tool.vulture]
 paths = ["aai_cli", "tests"]
@@ -278,6 +379,16 @@ ignore_decorators = ["@app.command", "@app.callback"]
 ignore_names = ["app", "capture_output", "download", "healthy", "ist", "lpath", "memory_keyring",
                 "org", "preserve_logging_state", "refresh", "rpath"]
 
+[tool.codespell]
+# Spell-check code, comments, and docs (Kubernetes' verify-spelling, generalized). Run via
+# `uvx codespell` in check.sh and as a pre-commit hook, so it needs no entry in uv.lock.
+# Skip generated/binary/snapshot trees and the lockfile; recorded fixtures and snapshots
+# are byte-pinned and must not be "corrected".
+skip = "./.venv,./dist,./docs,./node_modules,./.git,uv.lock,*.ambr,./tests/fixtures,./aai_cli/_version.py"
+# Domain words codespell misreads: "unparseable" (accepted variant), "ist" (an identifier),
+# "expresso" (a deliberate mis-transcription used as an eval/WER example).
+ignore-words-list = "unparseable,ist,expresso,notin,ans"
+
 [tool.deptry]
 exclude = ["docs", "dist", ".venv", "aai_cli/init/templates"]
 
diff --git a/scripts/check.sh b/scripts/check.sh
index a89e1f14..e238a58d 100755
--- a/scripts/check.sh
+++ b/scripts/check.sh
@@ -103,6 +103,16 @@ fi
 echo "==> markdownlint (docs/ is generated, so excluded)"
 markdownlint "**/*.md" --ignore docs --ignore node_modules --ignore .pytest_cache
 
+echo "==> codespell (spell-check code, comments, docs)"
+# Kubernetes' verify-spelling, generalized. Config (skips + ignore-words) is in
+# [tool.codespell] in pyproject.toml. Run via uvx (like twine below) so it needs no
+# entry in uv.lock; pre-commit also runs it. uvx self-skips if offline/unavailable.
+if command -v uvx >/dev/null 2>&1; then
+  uvx codespell .
+else
+  echo "   uvx not found; skipping (pre-commit + CI run codespell)"
+fi
+
 echo "==> json validity (all tracked + staged *.json)"
 # Parse every JSON file so a malformed dashboard / vercel.json / fixture fails here
 # instead of silently downstream (a bad dashboard just won't import). Validity only —
@@ -170,6 +180,32 @@ trap - EXIT
 echo "==> init template contract/import gate"
 uv run python scripts/template_contract_gate.py
 
+echo "==> unused snapshot/fixture gate"
+# xdist disables syrupy's own unused-snapshot detection, so a renamed/deleted test can
+# leave an orphaned .ambr or recorded API fixture behind. This static check catches it.
+uv run python scripts/unused_fixtures_gate.py
+
+echo "==> docs consistency gate (env vars / exit codes / command refs)"
+# curl's "every option is documented" presubmit, generalized: REFERENCE.md/README.md must
+# not drift from the code — every env var and exit code is documented, every `assembly …`
+# example names a real command.
+uv run python scripts/docs_consistency_gate.py
+
+echo "==> docstring coverage gate (public API ratchet)"
+# interrogate can't parse this codebase's PEP 695 generics, so an ast-based ratchet stands
+# in: public-API docstring coverage may not drop below the floor in scripts/.
+uv run python scripts/docstring_coverage_gate.py
+
+echo "==> brew audit (Homebrew formula)"
+# Lint the formula we ship (Formula/assembly.rb) the way Homebrew's own CI does, so a
+# formula regression fails here instead of on the release PR. brew is macOS/Linuxbrew
+# only, so this self-skips where it isn't installed (CI's release path has it).
+if command -v brew >/dev/null 2>&1; then
+  brew audit --strict --formula Formula/assembly.rb
+else
+  echo "   brew not found; skipping (Homebrew CI / release runner has it)"
+fi
+
 echo "==> pytest (with branch-coverage gate)"
 # Exclude e2e: they drive the CLI as a subprocess (uncounted by coverage) and need
 # a live API key. Exclude install (real per-template dep install, slow + network).
diff --git a/scripts/docs_consistency_gate.py b/scripts/docs_consistency_gate.py
new file mode 100644
index 00000000..6d495e37
--- /dev/null
+++ b/scripts/docs_consistency_gate.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+import typer
+
+from aai_cli.main import app
+
+# Docs-stay-in-sync gate, in the spirit of curl's "every option is documented" presubmit
+# and numpy's refguide-check: the reference doc and the code must not drift apart. Three
+# checks, all static and fast:
+#   1. Environment-variable parity — every AAI_*/ASSEMBLYAI_* var the code reads is either
+#      documented in REFERENCE.md or explicitly listed as internal here, and every such
+#      documented var is actually read (no stale rows).
+#   2. Exit-code parity — every numeric exit code the code returns is in REFERENCE.md's
+#      exit-code table.
+#   3. Command-reference validity — every `assembly <cmd> [<subcmd>]` example in the docs
+#      names a real command (catches a doc that outlives a rename).
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+REFERENCE = REPO_ROOT / "REFERENCE.md"
+DOC_SOURCES = (REPO_ROOT / "README.md", REFERENCE)
+PACKAGE = REPO_ROOT / "aai_cli"
+
+# Vars the code reads that are deliberately undocumented: telemetry plumbing overrides and
+# the scaffold's product-config vars (written into a generated app's .env, not CLI behavior).
+INTERNAL_VARS = {
+    "AAI_TELEMETRY_CLIENT_TOKEN",
+    "AAI_TELEMETRY_INTAKE_URL",
+    "AAI_MACOS_AUDIO_DEBUG",
+    "ASSEMBLYAI_BASE_URL",
+    "ASSEMBLYAI_LLM_GATEWAY_URL",
+    "ASSEMBLYAI_STREAMING_HOST",
+    "ASSEMBLYAI_AGENTS_HOST",
+}
+
+_VAR_RE = re.compile(r"\b((?:AAI|ASSEMBLYAI)_[A-Z0-9_]+)\b")
+_DOC_VAR_RE = re.compile(r"`((?:AAI|ASSEMBLYAI)_[A-Z0-9_]+)`")
+_EXIT_DOC_RE = re.compile(r"\|\s*`(\d+)`\s*\|")
+_EXIT_CODE_RE = re.compile(r"exit_code\s*[=:]\s*(\d+)|Exit\(code=(\d+)\)")
+_CMD_RE = re.compile(r"\bassembly\s+([a-z][\w-]*)(?:\s+([a-z][\w-]*))?")
+
+
+def _package_sources() -> str:
+    return "\n".join(
+        p.read_text(encoding="utf-8")
+        for p in PACKAGE.rglob("*.py")
+        if "templates" not in p.parts and p.name != "_version.py"
+    )
+
+
+def _env_var_errors() -> list[str]:
+    code = _package_sources()
+    code_vars = set(_VAR_RE.findall(code))
+    doc_vars = set(_DOC_VAR_RE.findall(REFERENCE.read_text(encoding="utf-8")))
+    return [
+        f"env var {var} is read in code but not documented in REFERENCE.md"
+        for var in sorted(code_vars - doc_vars - INTERNAL_VARS)
+    ] + [
+        f"env var {var} is documented in REFERENCE.md but never read in code"
+        for var in sorted(doc_vars - code_vars - INTERNAL_VARS)
+    ]
+
+
+def _exit_code_errors() -> list[str]:
+    documented = {int(m) for m in _EXIT_DOC_RE.findall(REFERENCE.read_text(encoding="utf-8"))}
+    errors: list[str] = []
+    for path in sorted(PACKAGE.rglob("*.py")):
+        if "templates" in path.parts:
+            continue
+        for groups in _EXIT_CODE_RE.findall(path.read_text(encoding="utf-8")):
+            code = int(next(g for g in groups if g))
+            if code not in documented:
+                rel = path.relative_to(REPO_ROOT)
+                errors.append(f"exit code {code} used in {rel} is not in REFERENCE.md's table")
+    return errors
+
+
+def _command_tree() -> tuple[set[str], dict[str, set[str]]]:
+    root = typer.main.get_command(app)
+    commands = getattr(root, "commands", {})
+    groups = {
+        name: set(getattr(obj, "commands", {}))
+        for name, obj in commands.items()
+        if hasattr(obj, "commands")
+    }
+    return set(commands), groups
+
+
+def _command_ref_errors() -> list[str]:
+    top, groups = _command_tree()
+    errors: list[str] = []
+    for doc in DOC_SOURCES:
+        for cmd, sub in _CMD_RE.findall(doc.read_text(encoding="utf-8")):
+            if cmd not in top:
+                errors.append(f"{doc.name}: `assembly {cmd}` names an unknown command")
+            elif sub and cmd in groups and sub not in groups[cmd]:
+                errors.append(f"{doc.name}: `assembly {cmd} {sub}` names an unknown subcommand")
+    return errors
+
+
+def main() -> int:
+    errors = _env_var_errors() + _exit_code_errors() + _command_ref_errors()
+    if not errors:
+        sys.stdout.write("Docs and code agree (env vars, exit codes, command references).\n")
+        return 0
+    for err in errors:
+        sys.stdout.write(f"{err}\n")
+    sys.stdout.write("Update REFERENCE.md/README.md (or the INTERNAL_VARS allowlist) to match.\n")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/docstring_coverage_gate.py b/scripts/docstring_coverage_gate.py
new file mode 100644
index 00000000..c78bfde8
--- /dev/null
+++ b/scripts/docstring_coverage_gate.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+import ast
+import sys
+from pathlib import Path
+
+# Docstring-coverage ratchet for the shipped package's public API, replacing `interrogate`
+# (which can't parse this codebase's PEP 695 generics, e.g. `def emit[T](...)`). Public =
+# the module plus every non-underscore class/function/method. The FLOOR is set at the
+# current level and only ever ratchets up: a change may not drop public-API documentation
+# below it, but nobody is forced to backfill the existing gap in one go. Raising FLOOR as
+# coverage climbs is a deliberate, reviewed edit here — the same model as a coverage gate.
+FLOOR = 64.0
+
+PACKAGE = Path(__file__).resolve().parent.parent / "aai_cli"
+
+_Def = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
+
+
+def _public_nodes(tree: ast.Module) -> list[ast.AST]:
+    nodes: list[ast.AST] = [tree]
+    nodes.extend(n for n in ast.walk(tree) if isinstance(n, _Def) and not n.name.startswith("_"))
+    return nodes
+
+
+def _coverage() -> tuple[int, int, list[str]]:
+    total = documented = 0
+    missing: list[str] = []
+    for path in sorted(PACKAGE.rglob("*.py")):
+        if "templates" in path.parts or path.name == "_version.py":
+            continue
+        tree = ast.parse(path.read_text(encoding="utf-8"))
+        for node in _public_nodes(tree):
+            total += 1
+            if ast.get_docstring(node):
+                documented += 1
+            else:
+                name = getattr(node, "name", "<module>")
+                missing.append(f"{path.relative_to(PACKAGE.parent)}:{name}")
+    return documented, total, missing
+
+
+def main() -> int:
+    documented, total, missing = _coverage()
+    pct = 100.0 * documented / total if total else 100.0
+    if pct + 1e-9 >= FLOOR:
+        sys.stdout.write(f"Public docstring coverage {pct:.1f}% (>= floor {FLOOR}%).\n")
+        return 0
+    sys.stdout.write(
+        f"Public docstring coverage {pct:.1f}% fell below the {FLOOR}% floor "
+        f"({documented}/{total}). Add docstrings to public APIs you touched:\n"
+    )
+    for item in missing[:20]:
+        sys.stdout.write(f"  {item}\n")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/unused_fixtures_gate.py b/scripts/unused_fixtures_gate.py
new file mode 100644
index 00000000..6961f6f7
--- /dev/null
+++ b/scripts/unused_fixtures_gate.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+# Orphaned-test-artifact gate, modeled on the Deno toolchain's "every `.out` file must be
+# referenced by a test" check (tools/lint.js). The unit suite runs under pytest-xdist
+# (`-n auto`), which disables syrupy's own unused-snapshot reporting — each worker only
+# sees a slice of the snapshots — so a renamed or deleted test can silently leave its
+# whole snapshot file or a recorded API fixture behind to rot. This catches that
+# statically and fast, with no extra test run.
+#
+# Two artifact kinds are checked:
+#   * tests/__snapshots__/<name>.ambr  — syrupy names a snapshot file after its test
+#     module, so each `.ambr` must have a matching tests/<name>.py.
+#   * tests/fixtures/api/<name>.json   — replay fixtures are loaded by stem
+#     (replay_fixtures.load_object("<name>")), so each must be referenced by name in
+#     some test module (the loader module itself doesn't count).
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+TESTS_DIR = REPO_ROOT / "tests"
+SNAPSHOT_DIR = TESTS_DIR / "__snapshots__"
+API_FIXTURE_DIR = TESTS_DIR / "fixtures" / "api"
+# The fixture loader names every stem in its own docstring/paths, so it can't count as a
+# real reference — only an actual test that loads the fixture should keep it alive.
+LOADER_MODULE = "replay_fixtures.py"
+
+
+def _orphaned_snapshots() -> list[Path]:
+    """`.ambr` files whose owning test module no longer exists."""
+    return [
+        ambr.relative_to(REPO_ROOT)
+        for ambr in sorted(SNAPSHOT_DIR.glob("*.ambr"))
+        if not (TESTS_DIR / f"{ambr.stem}.py").exists()
+    ]
+
+
+def _test_sources() -> list[str]:
+    """Bodies of every test module except the fixture loader."""
+    return [
+        path.read_text(encoding="utf-8")
+        for path in sorted(TESTS_DIR.rglob("*.py"))
+        if path.name != LOADER_MODULE
+    ]
+
+
+def _unreferenced_fixtures() -> list[Path]:
+    """API fixtures whose stem is never named by a test module."""
+    if not API_FIXTURE_DIR.exists():
+        return []
+    sources = _test_sources()
+    return [
+        fixture.relative_to(REPO_ROOT)
+        for fixture in sorted(API_FIXTURE_DIR.glob("*.json"))
+        if not any(fixture.stem in source for source in sources)
+    ]
+
+
+def main() -> int:
+    snapshot_orphans = _orphaned_snapshots()
+    fixture_orphans = _unreferenced_fixtures()
+    if not snapshot_orphans and not fixture_orphans:
+        sys.stdout.write("No orphaned snapshots or unreferenced fixtures.\n")
+        return 0
+    for path in snapshot_orphans:
+        sys.stdout.write(f"Orphaned snapshot (no matching test module): {path}\n")
+    for path in fixture_orphans:
+        sys.stdout.write(f"Unreferenced API fixture (no test loads it): {path}\n")
+    sys.stdout.write("Delete the dead artifact, or wire it back into a test.\n")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())