diff --git a/.github/workflows/pr-overlap.yml b/.github/workflows/pr-overlap.yml
new file mode 100644
index 00000000..6c26cd73
--- /dev/null
+++ b/.github/workflows/pr-overlap.yml
@@ -0,0 +1,63 @@
+name: PR overlap
+
+# Advisory only: with many agent sessions working concurrently, two open PRs that
+# touch the same files are usually duplicated or conflicting work discovered too
+# late (post-merge). This surfaces the intersection as a warning annotation +
+# step summary while both PRs are still open; it never fails the build.
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize]
+
+# Least privilege: listing open PRs and their files only needs read access.
+# No checkout, no third-party actions — the job is a single `gh api` script.
+permissions:
+  contents: read
+  pull-requests: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  overlap:
+    name: warn when another open PR touches the same files
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    env:
+      GH_TOKEN: ${{ github.token }}
+      # Numeric / repo-slug values, passed through env (not inlined into the
+      # script) so no event-controlled text is interpolated into bash.
+      PR_NUMBER: ${{ github.event.pull_request.number }}
+      REPO: ${{ github.repository }}
+    steps:
+      - name: Compare changed files against every other open PR
+        run: |
+          set -euo pipefail
+          mine="$(mktemp)"
+          theirs="$(mktemp)"
+          gh api "repos/${REPO}/pulls/${PR_NUMBER}/files" --paginate \
+            --jq '.[].filename' | sort -u > "$mine"
+          found=0
+          while read -r number; do
+            [ "$number" = "$PR_NUMBER" ] && continue
+            gh api "repos/${REPO}/pulls/${number}/files" --paginate \
+              --jq '.[].filename' | sort -u > "$theirs"
+            shared="$(comm -12 "$mine" "$theirs")"
+            if [ -n "$shared" ]; then
+              found=1
+              count="$(printf '%s\n' "$shared" | wc -l | tr -d '[:space:]')"
+              echo "::warning title=PR overlap::PR #${number} also changes ${count} of this PR's files - check for duplicated or conflicting work."
+              {
+                echo "### Overlap with PR #${number}"
+                echo ""
+                printf '%s\n' "$shared" | sed 's/^/- /'
+                echo ""
+              } >> "$GITHUB_STEP_SUMMARY"
+            fi
+          done < <(gh api "repos/${REPO}/pulls?state=open&per_page=100" --paginate \
+            --jq '.[].number')
+          if [ "$found" = 0 ]; then
+            echo "No open PR shares files with this one." >> "$GITHUB_STEP_SUMMARY"
+          fi
diff --git a/.importlinter b/.importlinter
index 3f12f8d0..31c8c359 100644
--- a/.importlinter
+++ b/.importlinter
@@ -11,6 +11,7 @@ source_modules =
     aai_cli.argscan
     aai_cli.auth
     aai_cli.caption_exec
+    aai_cli.choices
     aai_cli.client
     aai_cli.clip_exec
     aai_cli.clip_select
@@ -23,10 +24,12 @@ source_modules =
     aai_cli.deploy_exec
     aai_cli.dev_exec
     aai_cli.dictate_exec
+    aai_cli.doctor_checks
     aai_cli.dub_exec
     aai_cli.environments
     aai_cli.errors
     aai_cli.eval_data
+    aai_cli.eval_hf_api
     aai_cli.evaluate_exec
     aai_cli.follow
     aai_cli.help_panels
@@ -34,26 +37,34 @@ source_modules =
     aai_cli.hotkey
     aai_cli.init
     aai_cli.init_exec
+    aai_cli.jsonshape
     aai_cli.llm
     aai_cli.llm_exec
     aai_cli.mediafile
     aai_cli.microphone
+    aai_cli.onboard
     aai_cli.options
     aai_cli.output
     aai_cli.procs
     aai_cli.remotefs
     aai_cli.render
+    aai_cli.setup_exec
     aai_cli.share_exec
     aai_cli.speak_exec
     aai_cli.stdio
+    aai_cli.steps
     aai_cli.stream_exec
     aai_cli.streaming
     aai_cli.sync_stt
     aai_cli.telemetry
     aai_cli.theme
+    aai_cli.timeparse
     aai_cli.transcribe_batch
     aai_cli.transcribe_exec
     aai_cli.transcribe_render
+    aai_cli.tts
+    aai_cli.typer_patches
+    aai_cli.update_check
     aai_cli.webhook_listen
     aai_cli.wer
     aai_cli.ws
@@ -64,30 +75,11 @@ forbidden_modules =
 [importlinter:contract:2]
 name = Command modules are independent
 type = independence
+; Wildcard so every module under aai_cli/commands/ is covered automatically —
+; the previous enumerated list had silently drifted (onboard and speak were
+; missing, so nothing forbade them from importing sibling commands).
 modules =
-    aai_cli.commands.account
-    aai_cli.commands.agent
-    aai_cli.commands.audit
-    aai_cli.commands.caption
-    aai_cli.commands.clip
-    aai_cli.commands.deploy
-    aai_cli.commands.dev
-    aai_cli.commands.dictate
-    aai_cli.commands.doctor
-    aai_cli.commands.dub
-    aai_cli.commands.evaluate
-    aai_cli.commands.init
-    aai_cli.commands.keys
-    aai_cli.commands.llm
-    aai_cli.commands.login
-    aai_cli.commands.sessions
-    aai_cli.commands.setup
-    aai_cli.commands.share
-    aai_cli.commands.stream
-    aai_cli.commands.telemetry
-    aai_cli.commands.transcribe
-    aai_cli.commands.transcripts
-    aai_cli.commands.webhooks
+    aai_cli.commands.*
 
 [importlinter:contract:3]
 name = Library layers do not depend on Rich rendering
diff --git a/AGENTS.md b/AGENTS.md
index f215dc8e..256435b7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,6 +4,15 @@ This file provides guidance to coding agents (Claude Code, Codex, Cursor, and
 others) when working with code in this repository. `CLAUDE.md` is a symlink to
 this file, so Claude Code reads the same instructions.
 
+**Guidance is split per directory** so many agents can update it concurrently
+without conflicting in one file. This root file holds repo-wide invariants;
+read the `AGENTS.md` nearest the code you're changing:
+
+- `aai_cli/AGENTS.md` — architecture, the command-registration convention,
+  cross-cutting state, feature subsystems.
+- `tests/AGENTS.md` — test markers, snapshot goldens, hermeticity rules, and
+  the hard-won lessons for getting the patch-coverage and mutation gates green.
+
 ## Development commands
 
 This project uses [uv](https://docs.astral.sh/uv/). **Run every Python tool through `uv run`** so it uses the locked environment (`pyproject.toml` + `uv.lock`), not whatever is on `PATH`:
@@ -16,7 +25,7 @@ uv run assembly --help            # run the CLI from the locked environment
 
 Dev tooling is a PEP 735 `[dependency-groups]` group with `default-groups = ["dev"]`, not a `[project]` extra — `uv sync --extra dev` errors.
 
-`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" diff gate (`# type: ignore` / `# noqa` / `pragma: no cover` / net-new `Any` / `cast(`) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.`
+`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.`
 
 **Commits are gated.** On success `check.sh` records a working-tree signature (`scripts/gate_marker.py record` → `.git/aai-gate-pass`), and a PreToolUse hook (`.claude/hooks/require-gate-before-commit.sh`) blocks `git commit` unless that signature still matches — so run the full gate to completion *before* committing (a single-file `pytest` does not satisfy it), and re-run it after any further edit. Iterate with the fast targeted commands above, gate once at the end. For a deliberate work-in-progress commit, prefix `AAI_ALLOW_COMMIT=1 git commit …`.
 
@@ -32,83 +41,46 @@ uv run pytest tests/test_transcribe.py -q              # a single file
 uv run pytest tests/test_transcribe.py::test_name -q   # a single test
 ```
 
-The two diff-scoped tail gates are the slowest failures to discover via the full
-script; after a gate run (or any pytest run with the coverage flags below) they can
-be re-run alone:
-
-```sh
-uv run pytest -q -n auto --cov=aai_cli --cov-branch --cov-context=test --cov-report=xml  # refresh coverage data
-uv run diff-cover coverage.xml --compare-branch=origin/main --fail-under=100             # patch-coverage gate
-uv run python scripts/mutation_gate.py origin/main                                       # mutation gate
-```
-
-The gate is diff-scoped, so code predating it is never mutation-tested. To audit
-existing code (or a whole module) against the same bar, `scripts/mutation_sweep.py`
-reuses the gate's engine over *every* line of the files you name (or the whole
-package). Refresh coverage first, and pass `--timeout` to that pytest step — the
-default suite has no per-test timeout (it's opt-in; see `pyproject.toml`), so a
-deadlocked test would wedge the run instead of failing fast:
-
-```sh
-uv run pytest -q -n auto --timeout=60 --cov=aai_cli --cov-branch --cov-context=test --cov-report=
-uv run python scripts/mutation_sweep.py aai_cli/config.py   # or omit paths for the whole package
-```
-
-### Test markers
-
-The default suite **excludes** two slow/credentialed marker sets — `pyproject.toml`'s `addopts` carries `-m "not e2e and not install"`, so a bare `pytest` matches what `check.sh` gates. An explicit command-line `-m` overrides it for the opt-in runs:
-
-```sh
-uv run pytest -m e2e             # real-API end-to-end; needs ASSEMBLYAI_API_KEY, else skips
-uv run pytest -m install         # installs each init template's requirements for real; needs network + uv
-```
+The post-edit hook (`.claude/settings.json`) runs `ruff check --fix --unfixable F401` + `ruff format` on every edited `*.py`. `--unfixable F401` means a just-added import is **not** auto-deleted while it's momentarily unused — so adding an import in one edit and its usage in the next is safe. The flip side: a genuinely unused import survives the hook and only fails at `ruff check` in the gate, so still prefer making the import and its first usage land in the same edit.
 
-`check.sh` runs the default suite with a **90% branch-coverage gate** (`--cov-fail-under=90`). New code generally needs tests to clear that gate.
+## Working alongside other agents
+
+Dozens of sessions may be working on this repo concurrently; the codebase is
+structured so independent changes stay in disjoint files. Keep it that way:
+
+- **Check for in-flight duplicates before starting a fix.** Before implementing
+  a bug fix or small feature, scan open PRs and the last few `origin/main`
+  commits touching the same files (two sessions once shipped the identical fix;
+  the slower PR was closed as redundant). The `pr-overlap` workflow also warns
+  when a PR's changed files intersect another open PR's — treat that warning as
+  a prompt to reconcile, not noise.
+- **A new command edits no shared file.** Registration, help ordering, and the
+  snapshot partition are all derived from the command module's own `SPEC`
+  declaration (see `aai_cli/AGENTS.md`). If you find yourself editing a shared
+  list to add a command, you're fighting the convention.
+- **Dependency changes are not part of feature PRs.** `uv.lock` is the one file
+  two branches can never merge cleanly; add or bump dependencies in a
+  dedicated, single-purpose PR so feature branches don't collide in the
+  lockfile.
+- **Land through the merge queue.** The diff-scoped gates compare against
+  `origin/main`, which moves constantly; two individually-green PRs can be
+  jointly red. PRs should merge via GitHub's merge queue (a repository setting)
+  so the gate re-runs against the combined state before landing — don't bypass
+  it with direct pushes to `main`.
+- **Update the `AGENTS.md` nearest your change** when you learn something
+  durable; don't grow this root file.
 
-CLI output is pinned by **syrupy snapshot tests** (`tests/__snapshots__/*.ambr`). Changing help text, tables, or rendered output will fail those tests until you regenerate them with `uv run pytest --snapshot-update` and commit the updated `.ambr` files. The auto-format hook only touches `*.py`, and pre-commit's whitespace fixers deliberately skip `tests/__snapshots__/` (syrupy's indentation must stay byte-for-byte), so never hand-edit a snapshot — always regenerate. The `--help` goldens are split per command group (`tests/test_snapshots_help_<group>.py`) so concurrent branches touching different commands regenerate *different* `.ambr` files; a new top-level command must be added to `HELP_GROUPS` in `tests/_snapshot_surface.py` (the partition guard in `tests/test_snapshots_help_groups.py` fails until it is).
+## Naming & packaging gotchas
 
-The post-edit hook (`.claude/settings.json`) runs `ruff check --fix --unfixable F401` + `ruff format` on every edited `*.py`. `--unfixable F401` means a just-added import is **not** auto-deleted while it's momentarily unused — so adding an import in one edit and its usage in the next is safe. The flip side: a genuinely unused import survives the hook and only fails at `ruff check` in the gate, so still prefer making the import and its first usage land in the same edit.
+- The **package/module** is `aai_cli`; the **distribution** name is `aai-cli`; the **console command** is `assembly` (`[project.scripts] assembly = "aai_cli.main:run"`).
+- `assembly init` templates live in `aai_cli/init/templates/` and are **committed**, including renamed dotfiles (`gitignore` → `.gitignore`, `env.example`). The wheel force-includes them via `[tool.hatch.build.targets.wheel] artifacts`, excluding `__pycache__/*.pyc`. Editing templates needs care — see the parametrized contract tests (`tests/test_init_template_*.py`).
+- `audioop` left the stdlib in 3.13; `audioop-lts` backfills it (conditional dependency). Supported Pythons: 3.12–3.13.
+- **Releasing is tag-triggered.** The version is **derived from the git tag** by hatch-vcs and written to a gitignored `aai_cli/_version.py` at build time — there is no version string to keep in sync across `pyproject.toml` or `aai_cli/__init__.py`, and `bump_patch.sh` no longer exists. To cut a release, run `scripts/cut_release.sh` from a clean `main` in sync with `origin/main`: no argument → next patch above the latest `vX.Y.Z` tag; `cut_release.sh X.Y.Z` → explicit version. It tags + pushes, which fires `.github/workflows/release.yml` — that builds the prebuilt arm64 Homebrew bottle (`Formula/assembly.rb`), cuts the GitHub Release, and opens the formula PR. Bottling matters because the deps include Rust-backed sdists (`pydantic-core`, `jiter`, `cryptography`) that would otherwise compile from source on `brew install`. The Homebrew formula builds from a git-less GitHub source tarball, so `Formula/assembly.rb`'s `def install` sets the generic `SETUPTOOLS_SCM_PRETEND_VERSION` env var (installing resources first under a clean env, then setting the var for our package only) to feed the tag version to the build. **`cut_release.sh` only runs from a clean `main` in sync with `origin/main`** (it hard-errors on a feature branch / dirty tree), so cut releases from `main`, not your working branch. The "update available" notice users see is `aai_cli/update_check.py`.
 
-The suite is hermetic by construction, enforced three ways (`tests/conftest.py` + `pyproject.toml` `[tool.pytest.ini_options]`): **pytest-randomly** shuffles order, an autouse `pin_timezone` fixture pins `TZ` to a fixed non-UTC zone (UTC-normalized rendering must be unaffected; use **time-machine** to freeze `now`), and **pytest-socket** (`--disable-socket`) blocks real network so an unmocked SDK/HTTP call fails loudly instead of hitting the API. A test that only binds a loopback server opts back in with the tight `@pytest.mark.allow_hosts(["127.0.0.1"])` (still blocks external hosts). The `e2e`/`install` marker suites legitimately reach the real network in-process (PyPI reachability probes, real-API runs), so a `pytest_collection_modifyitems` hook in `conftest.py` auto-grants them full sockets — adding a network marker is all that's needed, no per-test `enable_socket`.
-
-### Writing tests that pass the diff gates
-
-Lessons that cost iterations getting the patch-coverage and mutation tail gates green:
-
-- **A boolean literal/default survives the mutation gate unless a test asserts the
-  difference between its two values**, not just that the line ran. `json_mode=False` passed
-  to `output.emit`, or `quiet=False` on `output.status`, get mutated to `True` — kill them by
-  asserting the *behavioral* split: the human branch prints bare text
-  (`result.output.strip() == "…"`, not a JSON object), or the spinner is actually entered
-  (monkeypatch `error_console.status` and assert it ran). A changed message / `prompter.note`
-  string is mutated whole, so one substring assert on the actionable keyword kills it.
-- **Help text and docstrings are pinned by the syrupy snapshots, not unit asserts** — a
-  mutated help string is killed by the regenerated `.ambr`, so `--snapshot-update` and commit
-  rather than adding redundant `--help` substring asserts.
-- **Typer's `CliRunner` merges stderr into `result.output`, and not in call order**, so don't
-  assume `splitlines()[-1]` is the command payload. In `--json` mode the env-mismatch warning
-  is its own `{"warning": …}` line, so filter parsed lines by a key the payload carries
-  (`next(o for o in objs if "env" in o)`). A monkeypatched fake must also mirror the real
-  signature — when a helper gains a kwarg (e.g. `output.status(…, quiet=…)`), doubles that
-  patch it must accept it or the call `TypeError`s.
-- **`--json` / `-j` is a per-command flag, not a root flag**: `assembly --json transcribe …` fails
-  with "No such option"; it's `assembly transcribe … --json`. (The root callback still sniffs the
-  whole token list via `argscan.requests_json`, so a callback-level failure like a bad
-  `--env` keeps the JSON error shape — but the flag itself lives on the subcommand.)
-- **Tests that touch global logging state must snapshot/restore it** — root handlers/level
-  and per-logger levels are process-global, so a leak only fails on some pytest-randomly
-  seeds (green locally, red in CI). Opt in to the shared `preserve_logging_state` conftest
-  fixture (it also resets the websockets wire loggers a silencer test may have clamped)
-  instead of hand-rolling the snapshot per module.
-
-### Manual QA / running the CLI in sandboxed sessions
+## Manual QA / running the CLI in sandboxed sessions
 
 Lessons that cost time in agent sessions — read before exercising `uv run assembly` by hand:
 
-- **Check for in-flight duplicates before starting a fix.** Sessions run concurrently:
-  before implementing a bug fix or small feature, scan open PRs and the last few
-  `origin/main` commits touching the same files (two sessions once shipped the identical
-  fix; the slower PR was closed as redundant). Seconds of checking beats a discarded PR.
 - **Web/remote containers are fully provisioned at session start**
   (`.claude/hooks/session-start.sh`): system deps, `markdownlint`/`prettier`, and the Go
   gate binaries (`actionlint`, `gitleaks`) are installed at CI's pinned versions, so
@@ -132,73 +104,9 @@ Lessons that cost time in agent sessions — read before exercising `uv run asse
   blocking path can't wedge the session. For pytest, `--timeout N` (pytest-timeout, in the
   dev group) does the same per-test.
 
-### Replay fixtures (offline end-to-end coverage)
-
-`tests/test_replay_e2e.py` drives whole commands (`transcribe`/`transcripts`/`llm`/
-`balance`/`usage`/`limits`) against **real** API responses recorded once and replayed
-offline — the command's own parsing/rendering runs, but pytest-socket stays armed, so
-these live in the default suite. Three moving parts:
-
-- **`tests/fixtures/api/*.json`** — scrubbed snapshots (API key/JWT redacted, `email` and
-  `account_id` faked, private `cdn.assemblyai.com/upload/…` URLs redacted). Committed and
-  gitleaks-clean; treat them like syrupy snapshots (regenerate, don't hand-edit).
-- **`scripts/record_fixtures.py`** — the recorder. It is **deliberately outside the gate**
-  (it hits the network) and is *not* mypy/pyright-checked (only ruff covers `scripts/`).
-  Refresh after an API shape change: `ASSEMBLYAI_API_KEY=… uv run python scripts/record_fixtures.py`.
-  The key comes from the env; the AMS session JWT + `account_id` from the keyring/`config.toml`
-  of whoever ran `assembly login` (profile `default`) — neither is ever written to a fixture.
-- **`tests/replay_fixtures.py`** — rebuilds the boundary objects from JSON. A transcript is a
-  real `aai.Transcript` via `Transcript.from_response`; an LLM response is rebuilt with
-  `ChatCompletion.model_construct` (**not** `model_validate`) because the gateway returns
-  Anthropic-flavored fields — `finish_reason="end_turn"`, token counts under
-  `input_tokens`/`output_tokens` — that strict validation rejects but the OpenAI SDK itself
-  parses leniently.
-
-The replay tests patch the same boundary the unit tests do
-(`commands.<cmd>.client.<fn>` / `.ams.<fn>` / `.gateway.complete`); the only difference is
-the return value comes from a recorded payload instead of a hand-built mock.
-
-## Naming & packaging gotchas
-
-- The **package/module** is `aai_cli`; the **distribution** name is `aai-cli`; the **console command** is `assembly` (`[project.scripts] assembly = "aai_cli.main:run"`).
-- `assembly init` templates live in `aai_cli/init/templates/` and are **committed**, including renamed dotfiles (`gitignore` → `.gitignore`, `env.example`). The wheel force-includes them via `[tool.hatch.build.targets.wheel] artifacts`, excluding `__pycache__/*.pyc`. Editing templates needs care — see the parametrized contract tests (`tests/test_init_template_*.py`).
-- `audioop` left the stdlib in 3.13; `audioop-lts` backfills it (conditional dependency). Supported Pythons: 3.12–3.13.
-- **Releasing is tag-triggered.** The version is **derived from the git tag** by hatch-vcs and written to a gitignored `aai_cli/_version.py` at build time — there is no version string to keep in sync across `pyproject.toml` or `aai_cli/__init__.py`, and `bump_patch.sh` no longer exists. To cut a release, run `scripts/cut_release.sh` from a clean `main` in sync with `origin/main`: no argument → next patch above the latest `vX.Y.Z` tag; `cut_release.sh X.Y.Z` → explicit version. It tags + pushes, which fires `.github/workflows/release.yml` — that builds the prebuilt arm64 Homebrew bottle (`Formula/assembly.rb`), cuts the GitHub Release, and opens the formula PR. Bottling matters because the deps include Rust-backed sdists (`pydantic-core`, `jiter`, `cryptography`) that would otherwise compile from source on `brew install`. The Homebrew formula builds from a git-less GitHub source tarball, so `Formula/assembly.rb`'s `def install` sets the generic `SETUPTOOLS_SCM_PRETEND_VERSION` env var (installing resources first under a clean env, then setting the var for our package only) to feed the tag version to the build. **`cut_release.sh` only runs from a clean `main` in sync with `origin/main`** (it hard-errors on a feature branch / dirty tree), so cut releases from `main`, not your working branch. The "update available" notice users see is `aai_cli/update_check.py`.
-
-## Architecture
-
-A Typer CLI. `aai_cli/main.py` builds the `app`, registers each command sub-app, and controls `assembly --help` ordering via `_COMMAND_ORDER` + a custom `_OrderedGroup`. `run()` is the entry point and swallows `BrokenPipeError` (closed downstream pipe → exit 0).
-
-### Command layer
-
-Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `dictate`, `agent`, `speak`, `llm`, `clip`, `transcripts`, `login` (login/logout/whoami), `doctor`, `init`, `dev`, `share`, `deploy`, `setup`, `onboard`, `account` (balance/usage/limits), `keys`, `sessions`, `audit`, `telemetry` (status/enable/disable), `webhooks` (listen)). Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps any `CLIError` to clean stderr output + the error's exit code. Commands never print tracebacks for expected failures.
-
-**Options/run split for flag-heavy commands** (gh-CLI style): the Typer function only parses argv into a frozen `<Cmd>Options` dataclass and hands it to a module-level `run_<cmd>(opts, state, *, json_mode)` through a thin lambda adapter in `run_command(ctx, ..., json=...)`. The seven run commands follow it — `aai_cli/stream_exec.py` (the reference implementation), `transcribe_exec.py`, `agent_exec.py`, `speak_exec.py`, `llm_exec.py`, `clip_exec.py`, `dictate_exec.py`. Because the run path is a plain function of data, tests construct options directly (`dataclasses.replace` off a defaults instance, see `tests/test_stream_exec.py` and `tests/test_command_options_seam.py`) instead of round-tripping argv through `CliRunner` — which is also the cheap way to kill mutation-gate mutants on orchestration lines. Follow this for new or heavily-reworked commands with long bodies; small commands keep the inline `body()` closure — the dataclass is pure ceremony there.
-
-### Cross-cutting state (resolution order matters)
-
-- **`context.py`** — `AppState` (profile, env) is attached to the Typer context in the root `@app.callback()`. `run_command` is the standard command wrapper.
-- **`config.py`** — profiles persisted in `config.toml` (via `platformdirs`); the **API key lives only in the OS keyring** (`KEYRING_SERVICE = "assemblyai-cli"`), never in a dotfile. Key resolution order: `--api-key` flag (validation paths only) → `ASSEMBLYAI_API_KEY` env → keyring. **Run commands deliberately expose no `--api-key` flag** so keys can't leak into `ps`/shell history.
-- **`environments.py`** — a frozen `Environment` (api_base, streaming_host, llm_gateway_base, ams_base, stytch_*). `DEFAULT_ENV` is **`production`**; use `--sandbox` (or `--env sandbox000` / `AAI_ENV`) to target the sandbox. The active environment is a process-global set once at startup; precedence: `--env` → `AAI_ENV` → profile's stored env → default. A credential is only valid against the environment that minted it.
-- **`client.py`** — thin wrappers over the `assemblyai` SDK (`transcribe`, `list_transcripts`, `stream_audio`, etc.). It normalizes SDK exceptions: auth failures become a single clean `auth_failure()` `CLIError`; everything else becomes `APIError`. New SDK calls should follow this try/except shape.
-- **`errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` switches to machine-readable output; it is never auto-enabled — `output.resolve_json()` deliberately keeps human text the default even when piped or agent-run.
-- **`debuglog.py`** — the root `-v/--verbose` flag (count: `-v` request-level at INFO, `-vv` wire-level at DEBUG). The CLI normally configures no logging, and the realtime paths *silence* library loggers (`ws.py`, `streaming/diagnostics.py`); verbose mode installs one redacting stderr handler and those silencers stand down. Secrets are registered at their resolution choke points (`config.resolve_api_key`, `AppState.resolve_session`) and masked in every rendered record — websockets logs the raw Authorization header at DEBUG, so masking lives in the formatter, not at call sites. Stdlib-only on purpose: `config` (a Rich-free layer) imports it.
-
-### Feature subsystems
-
-- **`streaming/`** + `client.stream_audio` — v3 realtime API. Event callbacks run on the SDK reader thread and guard against `BrokenPipeError` (`stdio.silence_stdout()`) so a closed pipe never dumps a thread traceback.
-- **`sync_stt.py`** + **`hotkey.py`** + `commands/dictate.py` — `assembly dictate`: push-to-talk dictation over the **Sync STT API** (`Environment.sync_base`, one POST `/transcribe` per utterance with the required `X-AAI-Model: u3-sync-pro` header; 80 ms–120 s of PCM/WAV). `hotkey.TerminalKeys` scopes stdin into cbreak (Ctrl-C still signals) and reads single keypresses; `dictate_exec._record` polls it with a zero timeout between ~100 ms mic chunks. All three boundaries (keys, mic, HTTP) are injectable, so the suite never needs a real terminal — `tests/test_hotkey.py` drives a pty pair for the termios behavior.
-- **`agent/`** — full-duplex voice agent (mic in, TTS out via `voices.py`).
-- **`tts/`** + `commands/speak.py` — `assembly speak` synthesizes text to speech over the sandbox streaming-TTS WebSocket (`streaming-tts.sandbox000.…`). **Sandbox-only:** `session.is_available()` is false in production (empty `Environment.streaming_tts_host`), so the command exits 2 with a `--sandbox` hint. `session.synthesize` drives a Begin→Generate→Flush→Audio→Terminate protocol with an injectable `connect` for hermetic tests (mirrors `agent/session.py`); `audio.py` plays the PCM (default) or writes a WAV (`--out`).
-- **`code_gen/`** — backs `--show-code` on `transcribe`/`stream`/`agent`: builds a ready-to-run Python SDK script from exactly the flags passed (no API key needed; generated code reads `ASSEMBLYAI_API_KEY`).
-- **`auth/`** — browser-assisted `assembly login` via AMS + **Stytch B2B OAuth discovery** (`discovery.py`, `flow.py`, `loopback.py`, `ams.py`). Not Stytch Connected Apps.
-- **`init/`** — scaffolds a self-contained FastAPI + HTML starter (`audio-transcription`/`live-captions`/`voice-agent` templates), optionally installs deps and opens the browser; writes the key to a git-ignored `.env`.
-- **`telemetry.py`** — anonymous, opt-out usage telemetry (Supabase-CLI model): `context.run_command` wraps each command body in `telemetry.track(ctx.command_path)`, which dispatches one allow-listed event (command path, outcome/exit code, duration, version/OS, and on failure the error message capped at 500 chars — never args or account data) to the Datadog logs intake via a **detached flusher subprocess** (the hidden `assembly telemetry flush`), so commands never wait on telemetry. `SHIPPED_CLIENT_TOKEN` is a committed write-only Datadog *client* token (`pub…`, embeddable by design — never an API key; `AAI_TELEMETRY_CLIENT_TOKEN` overrides). The test suite blanks it via an autouse conftest fixture so no test ever spawns a real flusher. Opt-out: `AAI_TELEMETRY_DISABLED=1` / `DO_NOT_TRACK=1` / `assembly telemetry disable` (persisted as `telemetry_enabled` in config.toml, alongside the random `device_id`). Send-side failures are swallowed (`OSError`/`CLIError`) — telemetry must never break a command.
-- **`commands/setup.py`** — `assembly setup install/status/remove` wires a coding agent up to AssemblyAI by installing three artifacts: the `assemblyai-docs` docs MCP (via `claude mcp add`), the AssemblyAI skill (via `npx skills add`), and the bundled `aai-cli` skill (copied out of the wheel, no network). Missing `claude`/`npx` is reported and skipped, not an error. The presence probes (docs MCP registered, skills on disk) live in `aai_cli/coding_agent.py` so `assembly doctor`'s coding-agent check can share them — command modules are import-linter-independent, so neither command may import the other.
-
 ## Conventions
 
 - `from __future__ import annotations` at the top of every module; modern typing (`X | None`).
-- Ruff lint set: `E,F,I,UP,B,BLE,C4,SIM,RET,PTH,ARG,S,RUF`. `S603/S607` are ignored project-wide because the CLI intentionally shells out to `claude`/`npx` with controlled args. `B008` is ignored (Typer uses `typer.Option/Argument` calls as defaults).
+- Ruff lint set: see `[tool.ruff.lint]` in `pyproject.toml`. `S603/S607` are ignored project-wide because the CLI intentionally shells out to `claude`/`npx` with controlled args. `B008` is ignored (Typer uses `typer.Option/Argument` calls as defaults).
 - mypy is strict on `aai_cli` (`disallow_untyped_defs`); tests are type-checked but exempt from return annotations.
 - Errors → stderr, data → stdout. Preserve this split; it's what makes the CLI pipeline-safe.
diff --git a/aai_cli/AGENTS.md b/aai_cli/AGENTS.md
new file mode 100644
index 00000000..f740bceb
--- /dev/null
+++ b/aai_cli/AGENTS.md
@@ -0,0 +1,93 @@
+# aai_cli/ — architecture guide
+
+Scoped guidance for the package source. Repo-wide invariants (gate, commit
+hooks, conventions) live in the root `AGENTS.md`; test-suite guidance lives in
+`tests/AGENTS.md`.
+
+## Architecture
+
+A Typer CLI. `aai_cli/main.py` builds the `app` and registers every command
+module discovered by `aai_cli/command_registry.py`. Typer/Click/Rich overrides
+(help palette, column clipping, pipe-safe consoles, Click error formatting)
+live in `aai_cli/typer_patches.py` — one file to fix when a dependency upgrade
+breaks a patch; each patch documents the upstream behavior it overrides.
+`run()` is the entry point and swallows `BrokenPipeError` (closed downstream
+pipe → exit 0).
+
+### Command layer & the registration convention
+
+Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`,
+`dictate`, `agent`, `speak`, `llm`, `clip`, `dub`, `caption`, `eval`,
+`transcripts`, `login` (login/logout/whoami), `doctor`, `init`, `dev`, `share`,
+`deploy`, `setup`, `onboard`, `account` (balance/usage/limits), `keys`,
+`sessions`, `audit`, `telemetry` (status/enable/disable), `webhooks` (listen)).
+
+**Adding a command is purely additive — no shared file edits.** Every command
+module declares a module-level
+`SPEC = command_registry.CommandModuleSpec(panel=…, order=…, commands=…)`:
+
+- `panel`: one of `help_panels.PANEL_ORDER` — which `assembly --help` panel its
+  commands render under. This declaration also derives the help-snapshot
+  partition (`HELP_GROUPS` in `tests/_snapshot_surface.py`), so a new command
+  is automatically required to have a `--help` golden in the right group.
+- `order`: a sparse rank within the panel (10, 20, 30, …) so a new command
+  slots between neighbors without renumbering them. Mark the line
+  `# pragma: no mutate` — a ±1 shift is order-equivalent, so no test can kill
+  that mutant.
+- `commands`: the top-level command names the module contributes, in display
+  order (multi-command merged modules like `login` list all three).
+- `group_name`: set for named sub-groups (`assembly keys list` style); the
+  registry then passes it to `add_typer(name=…, rich_help_panel=…)`. Merged
+  (nameless) modules instead set `rich_help_panel` on each `@app.command()`.
+
+`command_registry.discover()` imports every module under `aai_cli/commands/`,
+validates the convention (a module missing `SPEC` or `app` fails loudly at
+import), and orders them; `main.py` registers the result. The help ordering,
+the root `--help` golden, and the snapshot partition are all derived from the
+same `SPEC`s.
+
+Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps
+any `CLIError` to clean stderr output + the error's exit code. Commands never
+print tracebacks for expected failures.
+
+**Command modules are import-linter-independent** (`.importlinter` contract 2,
+wildcarded over `aai_cli.commands.*` so new modules are covered automatically).
+Logic shared between commands lives in the core layer: `doctor_checks.py`
+(diagnostics shared by `doctor` and onboarding) and `setup_exec.py` (installer
+steps shared by `setup` and onboarding) are the precedent — never import one
+command module from another.
+
+**Options/run split for flag-heavy commands** (gh-CLI style): the Typer
+function only parses argv into a frozen `<Cmd>Options` dataclass and hands it
+to a module-level `run_<cmd>(opts, state, *, json_mode)` through a thin lambda
+adapter in `run_command(ctx, ..., json=...)`. The run commands follow it —
+`aai_cli/stream_exec.py` (the reference implementation), `transcribe_exec.py`,
+`agent_exec.py`, `speak_exec.py`, `llm_exec.py`, `clip_exec.py`,
+`dictate_exec.py`. Because the run path is a plain function of data, tests
+construct options directly (`dataclasses.replace` off a defaults instance, see
+`tests/test_stream_exec.py` and `tests/test_command_options_seam.py`) instead
+of round-tripping argv through `CliRunner` — which is also the cheap way to
+kill mutation-gate mutants on orchestration lines. Follow this for new or
+heavily-reworked commands with long bodies; small commands keep the inline
+`body()` closure — the dataclass is pure ceremony there.
+
+### Cross-cutting state (resolution order matters)
+
+- **`context.py`** — `AppState` (profile, env) is attached to the Typer context in the root `@app.callback()`. `run_command` is the standard command wrapper.
+- **`config.py`** — profiles persisted in `config.toml` (via `platformdirs`); the **API key lives only in the OS keyring** (`KEYRING_SERVICE = "assemblyai-cli"`), never in a dotfile. Key resolution order: `--api-key` flag (validation paths only) → `ASSEMBLYAI_API_KEY` env → keyring. **Run commands deliberately expose no `--api-key` flag** so keys can't leak into `ps`/shell history.
+- **`environments.py`** — a frozen `Environment` (api_base, streaming_host, llm_gateway_base, ams_base, stytch_*). `DEFAULT_ENV` is **`production`**; use `--sandbox` (or `--env sandbox000` / `AAI_ENV`) to target the sandbox. The active environment is a process-global set once at startup; precedence: `--env` → `AAI_ENV` → profile's stored env → default. A credential is only valid against the environment that minted it.
+- **`client.py`** — thin wrappers over the `assemblyai` SDK (`transcribe`, `list_transcripts`, `stream_audio`, etc.). It normalizes SDK exceptions: auth failures become a single clean `auth_failure()` `CLIError`; everything else becomes `APIError`. New SDK calls should follow this try/except shape.
+- **`errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` switches to machine-readable output; it is never auto-enabled — `output.resolve_json()` deliberately keeps human text the default even when piped or agent-run.
+- **`debuglog.py`** — the root `-v/--verbose` flag (count: `-v` request-level at INFO, `-vv` wire-level at DEBUG). The CLI normally configures no logging, and the realtime paths *silence* library loggers (`ws.py`, `streaming/diagnostics.py`); verbose mode installs one redacting stderr handler and those silencers stand down. Secrets are registered at their resolution choke points (`config.resolve_api_key`, `AppState.resolve_session`) and masked in every rendered record — websockets logs the raw Authorization header at DEBUG, so masking lives in the formatter, not at call sites. Stdlib-only on purpose: `config` (a Rich-free layer) imports it.
+
+### Feature subsystems
+
+- **`streaming/`** + `client.stream_audio` — v3 realtime API. Event callbacks run on the SDK reader thread and guard against `BrokenPipeError` (`stdio.silence_stdout()`) so a closed pipe never dumps a thread traceback.
+- **`sync_stt.py`** + **`hotkey.py`** + `commands/dictate.py` — `assembly dictate`: push-to-talk dictation over the **Sync STT API** (`Environment.sync_base`, one POST `/transcribe` per utterance with the required `X-AAI-Model: u3-sync-pro` header; 80 ms–120 s of PCM/WAV). `hotkey.TerminalKeys` scopes stdin into cbreak (Ctrl-C still signals) and reads single keypresses; `dictate_exec._record` polls it with a zero timeout between ~100 ms mic chunks. All three boundaries (keys, mic, HTTP) are injectable, so the suite never needs a real terminal — `tests/test_hotkey.py` drives a pty pair for the termios behavior.
+- **`agent/`** — full-duplex voice agent (mic in, TTS out via `voices.py`).
+- **`tts/`** + `commands/speak.py` — `assembly speak` synthesizes text to speech over the sandbox streaming-TTS WebSocket (`streaming-tts.sandbox000.…`). **Sandbox-only:** `session.is_available()` is false in production (empty `Environment.streaming_tts_host`), so the command exits 2 with a `--sandbox` hint. `session.synthesize` drives a Begin→Generate→Flush→Audio→Terminate protocol with an injectable `connect` for hermetic tests (mirrors `agent/session.py`); `audio.py` plays the PCM (default) or writes a WAV (`--out`).
+- **`code_gen/`** — backs `--show-code` on `transcribe`/`stream`/`agent`: builds a ready-to-run Python SDK script from exactly the flags passed (no API key needed; generated code reads `ASSEMBLYAI_API_KEY`).
+- **`auth/`** — browser-assisted `assembly login` via AMS + **Stytch B2B OAuth discovery** (`discovery.py`, `flow.py`, `loopback.py`, `ams.py`). Not Stytch Connected Apps.
+- **`init/`** — scaffolds a self-contained FastAPI + HTML starter (`audio-transcription`/`live-captions`/`voice-agent` templates), optionally installs deps and opens the browser; writes the key to a git-ignored `.env`.
+- **`telemetry.py`** — anonymous, opt-out usage telemetry (Supabase-CLI model): `context.run_command` wraps each command body in `telemetry.track(ctx.command_path)`, which dispatches one allow-listed event (command path, outcome/exit code, duration, version/OS, and on failure the error message capped at 500 chars — never args or account data) to the Datadog logs intake via a **detached flusher subprocess** (the hidden `assembly telemetry flush`), so commands never wait on telemetry. `SHIPPED_CLIENT_TOKEN` is a committed write-only Datadog *client* token (`pub…`, embeddable by design — never an API key; `AAI_TELEMETRY_CLIENT_TOKEN` overrides). The test suite blanks it via an autouse conftest fixture so no test ever spawns a real flusher. Opt-out: `AAI_TELEMETRY_DISABLED=1` / `DO_NOT_TRACK=1` / `assembly telemetry disable` (persisted as `telemetry_enabled` in config.toml, alongside the random `device_id`). Send-side failures are swallowed (`OSError`/`CLIError`) — telemetry must never break a command.
+- **`commands/setup.py`** + **`setup_exec.py`** — `assembly setup install/status/remove` wires a coding agent up to AssemblyAI by installing three artifacts: the `assemblyai-docs` docs MCP (via `claude mcp add`), the AssemblyAI skill (via `npx skills add`), and the bundled `aai-cli` skill (copied out of the wheel, no network). Missing `claude`/`npx` is reported and skipped, not an error. The step implementations live in `aai_cli/setup_exec.py` and the presence probes (docs MCP registered, skills on disk) in `aai_cli/coding_agent.py`, so `assembly doctor` (via `doctor_checks.py`) and the onboarding wizard share them without command modules importing each other.
diff --git a/aai_cli/CLAUDE.md b/aai_cli/CLAUDE.md
new file mode 120000
index 00000000..47dc3e3d
--- /dev/null
+++ b/aai_cli/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/aai_cli/argscan.py b/aai_cli/argscan.py
index 5e7aad11..a55c8f40 100644
--- a/aai_cli/argscan.py
+++ b/aai_cli/argscan.py
@@ -9,11 +9,17 @@
 
 from __future__ import annotations
 
-# The standalone "give me JSON" flag spellings. Shared with main's misplaced-flag
+# The standalone "give me JSON" flag spellings. Shared with the misplaced-flag
 # hint (which recognizes a `--json`/`-j` passed at the root level), so the two
 # can't drift on which forms count.
 JSON_FLAGS = ("--json", "-j")
 
+# Where the root group stashes the raw token list on the Click context before
+# parsing (see `_OrderedGroup.parse_args` in main.py). The root callback and the
+# Click error formatter (typer_patches.py) both read it to honor a JSON opt-in
+# for failures raised before the subcommand parses its own --json.
+RAW_ARGS_META_KEY = "aai_raw_args"
+
 
 def requests_json(raw_args: list[str]) -> bool:
     """Whether the token list opts into JSON output: ``--json``, ``-j``,
diff --git a/aai_cli/command_registry.py b/aai_cli/command_registry.py
new file mode 100644
index 00000000..761e6b64
--- /dev/null
+++ b/aai_cli/command_registry.py
@@ -0,0 +1,93 @@
+"""Convention-based registration for command modules.
+
+Every module under ``aai_cli.commands`` declares a module-level
+``SPEC = CommandModuleSpec(...)`` describing how it plugs into the root app
+(which help panel it renders under, its rank within that panel, the top-level
+command names it contributes, and — for named sub-groups like ``transcripts`` —
+the ``add_typer`` name). ``main.py`` discovers and registers every module via
+:func:`discover`, so adding a command is purely additive: drop a new module in
+``aai_cli/commands/`` with a ``SPEC`` and it is imported, registered, ordered,
+and covered by the help-snapshot partition without editing any shared file.
+
+A module that forgets (or misdeclares) its ``SPEC`` fails loudly at import time
+rather than silently dropping out of the CLI.
+"""
+
+from __future__ import annotations
+
+import importlib
+import pkgutil
+from dataclasses import dataclass
+
+import typer
+
+from aai_cli import help_panels
+
+
+@dataclass(frozen=True)
+class CommandModuleSpec:
+    """How a module under ``aai_cli.commands`` plugs into the root app."""
+
+    # One of the help_panels.PANEL_ORDER headings its commands render under.
+    panel: str
+    # Rank within the panel. Sparse by convention (10, 20, 30, …) so a new command
+    # slots between two existing ones without renumbering its neighbors.
+    order: int
+    # Top-level command names this module contributes, in display order. Most
+    # modules contribute one; merged multi-command modules (e.g. login/logout/whoami)
+    # list each name so `assembly --help` ordering stays fully derived.
+    commands: tuple[str, ...]
+    # ``app.add_typer(name=...)`` for named sub-groups (``assembly keys list`` style);
+    # None for merged modules whose commands sit directly on the root app.
+    group_name: str | None = None
+
+
+@dataclass(frozen=True)
+class RegisteredModule:
+    """A discovered command module: its declared spec plus its Typer sub-app."""
+
+    spec: CommandModuleSpec
+    app: typer.Typer
+
+
+def _load(module_name: str) -> RegisteredModule:
+    """Import one command module and validate its registration convention."""
+    module = importlib.import_module(module_name)
+    spec = getattr(module, "SPEC", None)
+    if not isinstance(spec, CommandModuleSpec):
+        raise TypeError(
+            f"{module_name} must declare a module-level SPEC = CommandModuleSpec(...) "
+            "so it can be registered (see aai_cli/command_registry.py)."
+        )
+    if spec.panel not in help_panels.PANEL_ORDER:
+        raise RuntimeError(
+            f"{module_name} declares unknown help panel {spec.panel!r}; "
+            "use one of help_panels.PANEL_ORDER."
+        )
+    sub_app = getattr(module, "app", None)
+    if not isinstance(sub_app, typer.Typer):
+        raise TypeError(f"{module_name} must expose a module-level `app = typer.Typer(...)`.")
+    return RegisteredModule(spec=spec, app=sub_app)
+
+
+def discover() -> tuple[RegisteredModule, ...]:
+    """Every command module under ``aai_cli.commands``, in display order.
+
+    Display order is (panel rank, module's order, command names): panels render in
+    ``help_panels.PANEL_ORDER`` and stay contiguous by construction.
+    """
+    from aai_cli import commands as commands_pkg
+
+    panel_rank = {panel: rank for rank, panel in enumerate(help_panels.PANEL_ORDER)}
+    registered = [
+        _load(f"{commands_pkg.__name__}.{info.name}")
+        for info in pkgutil.iter_modules(commands_pkg.__path__)
+        if not info.name.startswith("_")
+    ]
+    registered.sort(key=lambda reg: (panel_rank[reg.spec.panel], reg.spec.order, reg.spec.commands))
+    return tuple(registered)
+
+
+def command_order(registered: tuple[RegisteredModule, ...]) -> tuple[str, ...]:
+    """Top-level command names in display order (drives `assembly --help`)."""
+    return tuple(name for module in registered for name in module.spec.commands)
diff --git a/aai_cli/commands/account.py b/aai_cli/commands/account.py
index 0d8cac7a..42028b82 100644
--- a/aai_cli/commands/account.py
+++ b/aai_cli/commands/account.py
@@ -8,7 +8,7 @@
 from rich.markup import escape
 from rich.text import Text
 
-from aai_cli import help_panels, jsonshape, options, output, timeparse
+from aai_cli import command_registry, help_panels, jsonshape, options, output, timeparse
 from aai_cli.auth import ams
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import UsageError
@@ -127,6 +127,12 @@ class _Usage(BaseModel):
 
 app = typer.Typer(help="Account billing, usage, and limits.")
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.ACCOUNT,
+    order=20,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("balance", "usage", "limits"),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.ACCOUNT,
diff --git a/aai_cli/commands/agent.py b/aai_cli/commands/agent.py
index da9dbe8e..7e2edbd8 100644
--- a/aai_cli/commands/agent.py
+++ b/aai_cli/commands/agent.py
@@ -4,7 +4,7 @@
 
 import typer
 
-from aai_cli import agent_exec, choices, help_panels, options, output
+from aai_cli import agent_exec, choices, command_registry, help_panels, options, output
 from aai_cli.agent.session import DEFAULT_GREETING, DEFAULT_PROMPT
 from aai_cli.agent.voices import (
     DEFAULT_VOICE,
@@ -17,6 +17,12 @@
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=40,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("agent",),
+)
+
 
 def _emit_voice_list(_state: AppState, json_mode: bool) -> None:
     """--list-voices body, routed through run_command so --json yields a
diff --git a/aai_cli/commands/audit.py b/aai_cli/commands/audit.py
index 0a5ed656..2981506d 100644
--- a/aai_cli/commands/audit.py
+++ b/aai_cli/commands/audit.py
@@ -5,13 +5,19 @@
 import typer
 from rich.markup import escape
 
-from aai_cli import help_panels, jsonshape, options, output, timeparse
+from aai_cli import command_registry, help_panels, jsonshape, options, output, timeparse
 from aai_cli.auth import ams
 from aai_cli.context import AppState, run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer(help="View your account's audit log.")
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.ACCOUNT,
+    order=40,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("audit",),
+)
+
 # `__`-separated variants are handled by _normalize_action (which maps `__` -> `.`),
 # so only the dotted forms need entries here.
 _LOGIN_ACTIONS = {"login", "login.succeeded"}
diff --git a/aai_cli/commands/caption.py b/aai_cli/commands/caption.py
index 1287e9a3..a6d3e694 100644
--- a/aai_cli/commands/caption.py
+++ b/aai_cli/commands/caption.py
@@ -4,12 +4,18 @@
 
 import typer
 
-from aai_cli import caption_exec, help_panels, options
+from aai_cli import caption_exec, command_registry, help_panels, options
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=90,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("caption",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
diff --git a/aai_cli/commands/clip.py b/aai_cli/commands/clip.py
index 6c6aa487..cd4d5ad6 100644
--- a/aai_cli/commands/clip.py
+++ b/aai_cli/commands/clip.py
@@ -4,12 +4,18 @@
 
 import typer
 
-from aai_cli import clip_exec, help_panels, llm, options
+from aai_cli import clip_exec, command_registry, help_panels, llm, options
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=70,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("clip",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
diff --git a/aai_cli/commands/deploy.py b/aai_cli/commands/deploy.py
index a3a0bf9d..6f3745b9 100644
--- a/aai_cli/commands/deploy.py
+++ b/aai_cli/commands/deploy.py
@@ -3,13 +3,19 @@
 
 import typer
 
-from aai_cli import deploy_exec, help_panels, options
+from aai_cli import command_registry, deploy_exec, help_panels, options
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 # Flattened single-command sub-typer (same pattern as `assembly dev`).
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.BUILD,
+    order=40,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("deploy",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.BUILD,
diff --git a/aai_cli/commands/dev.py b/aai_cli/commands/dev.py
index eb9be5e4..5d19eb7e 100644
--- a/aai_cli/commands/dev.py
+++ b/aai_cli/commands/dev.py
@@ -3,7 +3,7 @@
 
 import typer
 
-from aai_cli import dev_exec, help_panels, options
+from aai_cli import command_registry, dev_exec, help_panels, options
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 from aai_cli.init import devserver
@@ -12,6 +12,12 @@
 # @app.command() registered via app.add_typer(dev.app) with no name.
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.BUILD,
+    order=20,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("dev",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.BUILD,
diff --git a/aai_cli/commands/dictate.py b/aai_cli/commands/dictate.py
index 34261a20..32e166e9 100644
--- a/aai_cli/commands/dictate.py
+++ b/aai_cli/commands/dictate.py
@@ -2,13 +2,19 @@
 
 import typer
 
-from aai_cli import dictate_exec, help_panels, options
+from aai_cli import command_registry, dictate_exec, help_panels, options
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 from aai_cli.sync_stt import MAX_AUDIO_SECONDS
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=30,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("dictate",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
diff --git a/aai_cli/commands/doctor.py b/aai_cli/commands/doctor.py
index 4bd7faa0..ebd334d8 100644
--- a/aai_cli/commands/doctor.py
+++ b/aai_cli/commands/doctor.py
@@ -1,274 +1,18 @@
 from __future__ import annotations
 
-import shutil
-import sys
-from abc import abstractmethod
-from collections.abc import Mapping, Sequence
-from typing import NotRequired, Protocol, TypedDict
-
 import typer
-from rich.markup import escape
 
-from aai_cli import client, coding_agent, config, environments, help_panels, options, output, theme
+from aai_cli import command_registry, doctor_checks, environments, help_panels, options, output
 from aai_cli.context import AppState, run_command
-from aai_cli.errors import CLIError, NotAuthenticated
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
-
-class Check(TypedDict):
-    """One diagnostic: a named check, its status, what it affects, and how to fix it."""
-
-    name: str
-    status: str  # "ok" | "warn" | "fail" — only "fail" makes `doctor` exit non-zero
-    affects: list[str]
-    detail: str
-    fix: str | None
-
-
-class DoctorResult(TypedDict):
-    ok: bool
-    # Which profile/environment the checks ran against. `assembly doctor` always fills
-    # these in; the onboarding wizard reuses `render` for a partial check without
-    # them, so they stay optional.
-    profile: NotRequired[str]
-    environment: NotRequired[str]
-    checks: list[Check]
-
-
-class _SoundDeviceModule(Protocol):
-    @abstractmethod
-    def query_devices(self) -> Sequence[Mapping[str, object]]:
-        """List the audio devices sounddevice can see."""
-
-
-# Status -> (affordance symbol, render style). "fail" is a blocker; "warn" is
-# degraded-but-usable. Drives the per-check glyph in `render`.
-_SYMBOL = {
-    "ok": (theme.SYMBOL_SUCCESS, "aai.success"),
-    "warn": (theme.SYMBOL_WARN, "aai.warn"),
-    "fail": (theme.SYMBOL_ERROR, "aai.error"),
-}
-
-
-def _check(
-    name: str,
-    status: str,
-    detail: str,
-    *,
-    fix: str | None = None,
-    affects: list[str] | None = None,
-) -> Check:
-    """Assemble a Check. ``affects`` defaults to empty — an 'ok' check blocks nothing."""
-    return {"name": name, "status": status, "affects": affects or [], "detail": detail, "fix": fix}
-
-
-def check_python() -> Check:
-    v = sys.version_info
-    version = f"{v.major}.{v.minor}.{v.micro}"
-    if v >= (3, 12):
-        return _check("python", "ok", version)
-    return _check(
-        "python",
-        "fail",
-        f"Python {version} is too old; the CLI needs 3.12+",
-        fix="Install Python 3.12 or newer, then reinstall the CLI.",
-        affects=["everything"],
-    )
-
-
-# Named _check_credentials (not *api_key*): the report dict carries only status text,
-# but CodeQL's name heuristic would treat the call's return value as a secret and flag
-# the doctor payload emit (py/clear-text-logging-sensitive-data).
-def _check_credentials(profile: str) -> Check:
-    try:
-        key = config.resolve_api_key(profile=profile)
-    except NotAuthenticated:
-        if not config.keyring_usable():
-            # On a box with no keyring, `assembly login` can't persist a key either, so
-            # point at the env var that actually works here instead of a dead end.
-            return _check(
-                "api-key",
-                "fail",
-                "No API key found, and this machine has no usable OS keyring.",
-                fix="Set ASSEMBLYAI_API_KEY (browser login can't store a key without a keyring).",
-                affects=["everything"],
-            )
-        return _check(
-            "api-key",
-            "fail",
-            "No API key found.",
-            fix="Run 'assembly login' (or set ASSEMBLYAI_API_KEY).",
-            affects=["everything"],
-        )
-    # validate_key doubles as the connectivity probe: it makes one cheap authed call,
-    # so a pass means the key is valid AND the active environment's API is reachable.
-    api_host = environments.active().api_base.removeprefix("https://")
-    try:
-        valid = client.validate_key(key)
-    except CLIError as exc:
-        return _check(
-            "api-key",
-            "fail",
-            f"Could not reach AssemblyAI: {exc.message}",
-            fix=f"Check your network/proxy and that {api_host} is reachable.",
-            affects=["everything"],
-        )
-    if valid:
-        return _check("api-key", "ok", "API key is valid and AssemblyAI is reachable.")
-    # validate_key collapses every auth-shaped failure (401, 403, proxy "forbidden")
-    # to False, so don't claim a specific status code we never saw.
-    return _check(
-        "api-key",
-        "fail",
-        "API key was rejected by the server.",
-        fix="Run 'assembly login' with a valid key.",
-        affects=["everything"],
-    )
-
-
-def check_ffmpeg() -> Check:
-    # ffmpeg is ONLY used to stream non-WAV files or URLs (stream/agent), where it
-    # decodes them to 16 kHz mono PCM on the fly. Plain `transcribe` (including
-    # YouTube URLs) uploads the file to AssemblyAI and never invokes ffmpeg, so it is
-    # not required for transcription.
-    if shutil.which("ffmpeg"):
-        return _check("ffmpeg", "ok", "found")
-    return _check(
-        "ffmpeg",
-        "warn",
-        (
-            "ffmpeg not found. Only needed to stream non-WAV files or URLs; "
-            "transcription (including YouTube) works without it, as does streaming a "
-            "16 kHz mono WAV."
-        ),
-        fix=(
-            "Install ffmpeg (macOS: brew install ffmpeg; Debian/Ubuntu: apt-get install "
-            "ffmpeg; Fedora: dnf install ffmpeg; Windows: winget install ffmpeg)."
-        ),
-        affects=["stream/agent (non-WAV file or URL input)"],
-    )
-
-
-def _probe_input_devices() -> int:
-    """Number of available microphone (input) devices. Raises if audio is unavailable."""
-    sd = _sounddevice()
-    devices = sd.query_devices()
-    return sum(1 for device in devices if _input_channels(device) > 0)
-
-
-def _sounddevice() -> _SoundDeviceModule:
-    import sounddevice as module
-
-    sd: _SoundDeviceModule = module
-    return sd
-
-
-def _input_channels(device: Mapping[str, object]) -> int:
-    channels = device.get("max_input_channels")
-    return channels if isinstance(channels, int) else 0
-
-
-def check_audio() -> Check:
-    affects = ["stream (microphone)", "agent"]
-    try:
-        inputs = _probe_input_devices()
-    except ImportError:
-        return _check(
-            "audio",
-            "warn",
-            "sounddevice is not importable; the microphone can't be used.",
-            fix="pip install --force-reinstall sounddevice",
-            affects=affects,
-        )
-    except Exception as exc:  # noqa: BLE001 - any PortAudio/device failure is a soft warning
-        return _check(
-            "audio",
-            "warn",
-            f"audio system unavailable: {exc}",
-            fix=(
-                "Install PortAudio (Debian/Ubuntu: sudo apt-get install libportaudio2; "
-                "Fedora: sudo dnf install portaudio; macOS: brew install portaudio)."
-            ),
-            affects=affects,
-        )
-    if inputs == 0:
-        return _check(
-            "audio",
-            "warn",
-            "No microphone (input device) found.",
-            fix="Connect a microphone; live mic input is needed for stream/agent.",
-            affects=affects,
-        )
-    return _check("audio", "ok", f"{inputs} microphone input device(s) available.")
-
-
-def _check_coding_agent() -> Check:
-    missing = [tool for tool in ("claude", "npx") if shutil.which(tool) is None]
-    if not missing:
-        # Tools are present, so report what `assembly setup install` actually
-        # installed rather than always suggesting it.
-        not_installed = coding_agent.missing_components()
-        if not not_installed:
-            return _check(
-                "coding-agent", "ok", "claude and npx found; docs MCP + skills installed."
-            )
-        return _check(
-            "coding-agent",
-            "ok",
-            "claude and npx found; run 'assembly setup install' to add: "
-            f"{', '.join(not_installed)}.",
-        )
-    return _check(
-        "coding-agent",
-        "warn",
-        f"not found: {', '.join(missing)}.",
-        fix=(
-            "Install Claude Code (https://claude.com/claude-code) and Node.js, "
-            "then run 'assembly setup install'."
-        ),
-        affects=["assembly setup install"],
-    )
-
-
-def render_check_lines(checks: list[Check]) -> list[str]:
-    """The per-check report lines (glyph, name — detail, indented fix hint).
-
-    Shared with the onboarding wizard's environment section (which renders the same
-    checks with its own summary line), so the two renders can't drift."""
-    lines: list[str] = []
-    for c in checks:
-        symbol, style = _SYMBOL.get(c["status"], (theme.SYMBOL_HINT, "aai.muted"))
-        lines.append(
-            f"  [{style}]{escape(symbol)}[/{style}] {escape(c['name'])} — {escape(c['detail'])}"
-        )
-        if c["fix"]:
-            lines.append("      " + output.hint(f"fix: {escape(c['fix'])}"))
-    return lines
-
-
-def render(data: DoctorResult) -> str:
-    checks = data["checks"]
-    lines = [output.heading("Environment check")]
-    profile, environment = data.get("profile"), data.get("environment")
-    if profile is not None and environment is not None:
-        lines.append(
-            "  " + output.hint(f"profile: {escape(profile)} · environment: {escape(environment)}")
-        )
-    lines.extend(render_check_lines(checks))
-    if data["ok"]:
-        lines.append("  " + output.success("Everything looks good."))
-        # Only the real `assembly doctor` carries profile context; the onboarding wizard
-        # reuses render() for a partial check and has its own next-steps, so don't
-        # tack a "try transcribe" hint onto that one.
-        if data.get("profile") is not None:
-            lines.append("  " + output.hint("Try it: assembly transcribe --sample"))
-    else:
-        failed = sum(1 for c in checks if c["status"] == "fail")
-        noun = "problem" if failed == 1 else "problems"
-        lines.append("  " + output.fail(f"{failed} {noun} found — see fixes above."))
-    return "\n".join(lines)
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.SETUP,
+    order=10,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("doctor",),
+)
 
 
 @app.command(
@@ -288,21 +32,23 @@ def doctor(
 
     def body(state: AppState, json_mode: bool) -> None:
         profile = state.resolve_profile()
+        # Called through the module (not name-imported) so a monkeypatched check in
+        # aai_cli.doctor_checks is seen here and by the onboarding wizard alike.
         checks = [
-            check_python(),
-            _check_credentials(profile),
-            check_ffmpeg(),
-            check_audio(),
-            _check_coding_agent(),
+            doctor_checks.check_python(),
+            doctor_checks.check_credentials(profile),
+            doctor_checks.check_ffmpeg(),
+            doctor_checks.check_audio(),
+            doctor_checks.check_coding_agent(),
         ]
         ok = not any(c["status"] == "fail" for c in checks)
-        payload: DoctorResult = {
+        payload: doctor_checks.DoctorResult = {
             "ok": ok,
             "profile": profile,
             "environment": environments.active().name,
             "checks": checks,
         }
-        output.emit(payload, render, json_mode=json_mode)
+        output.emit(payload, doctor_checks.render, json_mode=json_mode)
         if not ok:
             raise typer.Exit(code=1)
 
diff --git a/aai_cli/commands/dub.py b/aai_cli/commands/dub.py
index ef96f89a..aab668d2 100644
--- a/aai_cli/commands/dub.py
+++ b/aai_cli/commands/dub.py
@@ -4,12 +4,18 @@
 
 import typer
 
-from aai_cli import dub_exec, help_panels, llm, options
+from aai_cli import command_registry, dub_exec, help_panels, llm, options
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=80,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("dub",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
diff --git a/aai_cli/commands/evaluate.py b/aai_cli/commands/evaluate.py
index 1ac7381a..69830d4c 100644
--- a/aai_cli/commands/evaluate.py
+++ b/aai_cli/commands/evaluate.py
@@ -9,13 +9,19 @@
 
 import typer
 
-from aai_cli import evaluate_exec, help_panels, options
+from aai_cli import command_registry, evaluate_exec, help_panels, options
 from aai_cli.context import run_command
 from aai_cli.evaluate_exec import EvalSpeechModel
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=100,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("eval",),
+)
+
 
 @app.command(
     name="eval",
diff --git a/aai_cli/commands/init.py b/aai_cli/commands/init.py
index f23c8d63..2cbb3795 100644
--- a/aai_cli/commands/init.py
+++ b/aai_cli/commands/init.py
@@ -3,7 +3,7 @@
 
 import typer
 
-from aai_cli import help_panels, init_exec, options
+from aai_cli import command_registry, help_panels, init_exec, options
 from aai_cli.context import AppState, run_command
 from aai_cli.help_text import examples_epilog
 from aai_cli.init import templates
@@ -13,6 +13,12 @@
 # no name. Bare `assembly init` runs the command with template=None -> the interactive picker.
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.BUILD,
+    order=10,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("init",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.BUILD,
diff --git a/aai_cli/commands/keys.py b/aai_cli/commands/keys.py
index b6dc50b2..25d6ac39 100644
--- a/aai_cli/commands/keys.py
+++ b/aai_cli/commands/keys.py
@@ -3,7 +3,7 @@
 import typer
 from rich.markup import escape
 
-from aai_cli import jsonshape, options, output
+from aai_cli import command_registry, help_panels, jsonshape, options, output
 from aai_cli.auth import ams
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import APIError, UsageError
@@ -11,6 +11,13 @@
 
 app = typer.Typer(help="List, create, and rename your AssemblyAI API keys.", no_args_is_help=True)
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.ACCOUNT,
+    order=30,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("keys",),
+    group_name="keys",
+)
+
 
 def _project_id(project: dict[str, object]) -> int | None:
     value = project.get("id")
diff --git a/aai_cli/commands/llm.py b/aai_cli/commands/llm.py
index 3bcc71e8..762ec897 100644
--- a/aai_cli/commands/llm.py
+++ b/aai_cli/commands/llm.py
@@ -2,7 +2,7 @@
 
 import typer
 
-from aai_cli import choices, help_panels, llm_exec, options, output
+from aai_cli import choices, command_registry, help_panels, llm_exec, options, output
 from aai_cli import llm as gateway
 from aai_cli.context import run_command
 from aai_cli.errors import UsageError
@@ -10,6 +10,12 @@
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=60,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("llm",),
+)
+
 
 def _list_models(output_field: choices.TextOrJson | None, json_mode: bool) -> None:
     """The --list-models body, routed through run_command so --json yields a
diff --git a/aai_cli/commands/login.py b/aai_cli/commands/login.py
index 38571368..93f0e1b6 100644
--- a/aai_cli/commands/login.py
+++ b/aai_cli/commands/login.py
@@ -4,13 +4,19 @@
 from rich.markup import escape
 from rich.table import Table
 
-from aai_cli import client, config, environments, help_panels, options, output
+from aai_cli import client, command_registry, config, environments, help_panels, options, output
 from aai_cli.context import AppState, persist_browser_login, run_command
 from aai_cli.errors import APIError, CLIError, UsageError
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.ACCOUNT,
+    order=10,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("login", "logout", "whoami"),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.ACCOUNT,
diff --git a/aai_cli/commands/onboard.py b/aai_cli/commands/onboard.py
index ab32c38d..84c3e39f 100644
--- a/aai_cli/commands/onboard.py
+++ b/aai_cli/commands/onboard.py
@@ -2,7 +2,7 @@
 
 import typer
 
-from aai_cli import help_panels, options, output, stdio
+from aai_cli import command_registry, help_panels, options, output, stdio
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import CLIError
 from aai_cli.help_text import examples_epilog
@@ -12,6 +12,12 @@
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.QUICK_START,
+    order=10,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("onboard",),
+)
+
 
 def build_prompter(*, non_interactive: bool = False) -> Prompter:
     """A real prompter only when the caller hasn't opted out and both ends are a TTY;
diff --git a/aai_cli/commands/sessions.py b/aai_cli/commands/sessions.py
index f1ad5e5e..9ec79b0a 100644
--- a/aai_cli/commands/sessions.py
+++ b/aai_cli/commands/sessions.py
@@ -6,13 +6,20 @@
 from rich.markup import escape
 from rich.table import Table
 
-from aai_cli import jsonshape, options, output, theme, timeparse
+from aai_cli import command_registry, help_panels, jsonshape, options, output, theme, timeparse
 from aai_cli.auth import ams
 from aai_cli.context import AppState, run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer(help="Browse your past streaming (real-time) sessions.", no_args_is_help=True)
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.HISTORY,
+    order=20,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("sessions",),
+    group_name="sessions",
+)
+
 # Fields shown by `sessions get`, in display order.
 _DETAIL_FIELDS = (
     "session_id",
diff --git a/aai_cli/commands/setup.py b/aai_cli/commands/setup.py
index 986f3f75..3400f1c5 100644
--- a/aai_cli/commands/setup.py
+++ b/aai_cli/commands/setup.py
@@ -1,248 +1,22 @@
 from __future__ import annotations
 
-import shutil
-import subprocess
-from pathlib import Path
-from typing import TYPE_CHECKING
-
 import typer
 
-from aai_cli import choices, coding_agent, options, output
+from aai_cli import choices, command_registry, help_panels, options, output, setup_exec
 from aai_cli.context import AppState, run_command
 from aai_cli.help_text import examples_epilog
-from aai_cli.steps import Step, render_steps
-
-if TYPE_CHECKING:
-    # Annotation only (PEP 563 string), so no runtime import. Import from
-    # importlib.abc — that is the protocol `resources.files()` is typed to return.
-    from importlib.abc import Traversable
 
 app = typer.Typer(
     help="Set up your coding agent for AssemblyAI (docs MCP + skills).",
     no_args_is_help=True,
 )
 
-MCP_URL = "https://mcp.assemblyai.com/docs"
-SKILL_REPO = "AssemblyAI/assemblyai-skill"
-_STEPS_HEADING = "AssemblyAI coding-agent setup:"
-
-# The subprocess wrapper, artifact names, and presence probes are shared with
-# `assembly doctor` (command modules are independent), so they live in
-# aai_cli.coding_agent; the names below keep this module's call sites stable.
-MCP_NAME = coding_agent.MCP_NAME
-_run = coding_agent.run
-_mcp_present = coding_agent.mcp_present
-_skill_dir = coding_agent.skill_dir
-_skill_installed = coding_agent.skill_installed
-_cli_skill_dir = coding_agent.cli_skill_dir
-_cli_skill_installed = coding_agent.cli_skill_installed
-
-
-def _proc_detail(proc: subprocess.CompletedProcess[str]) -> str:
-    """The error text from a finished process: stderr if present, else stdout."""
-    return (proc.stderr or proc.stdout).strip()
-
-
-# --- docs MCP (registered via the `claude` CLI) ------------------------------
-
-
-def install_mcp(scope: str, force: bool) -> Step:
-    if shutil.which("claude") is None:
-        return {
-            "name": "mcp",
-            "status": "skipped",
-            "detail": (
-                "Claude Code not found. Install it (https://claude.com/claude-code), "
-                f"then run: claude mcp add --transport http --scope {scope} "
-                f"{MCP_NAME} {MCP_URL}"
-            ),
-        }
-    if _mcp_present():
-        if not force:
-            return {"name": "mcp", "status": "already", "detail": f"{MCP_NAME} already registered"}
-        removed = _run(["claude", "mcp", "remove", MCP_NAME])
-        if removed.returncode != 0:
-            return {
-                "name": "mcp",
-                "status": "failed",
-                "detail": f"could not remove existing {MCP_NAME}: " + _proc_detail(removed),
-            }
-    proc = _run(
-        ["claude", "mcp", "add", "--transport", "http", "--scope", scope, MCP_NAME, MCP_URL]
-    )
-    if proc.returncode != 0:
-        return {"name": "mcp", "status": "failed", "detail": _proc_detail(proc)}
-    return {"name": "mcp", "status": "installed", "detail": f"{MCP_NAME} @ {scope} scope"}
-
-
-def _mcp_status() -> Step:
-    if shutil.which("claude") is None:
-        return {"name": "mcp", "status": "unknown", "detail": "Claude Code not found"}
-    present = _mcp_present()
-    return {
-        "name": "mcp",
-        "status": "installed" if present else "not_installed",
-        "detail": MCP_NAME,
-    }
-
-
-def _remove_mcp(scope: str | None) -> Step:
-    if shutil.which("claude") is None:
-        return {"name": "mcp", "status": "skipped", "detail": "Claude Code not found"}
-    if not _mcp_present():
-        return {"name": "mcp", "status": "not_installed", "detail": MCP_NAME}
-    cmd = ["claude", "mcp", "remove", MCP_NAME]
-    if scope is not None:
-        cmd += ["--scope", scope]
-    proc = _run(cmd)
-    if proc.returncode != 0:
-        return {"name": "mcp", "status": "failed", "detail": _proc_detail(proc)}
-    return {"name": "mcp", "status": "removed", "detail": MCP_NAME}
-
-
-# --- assemblyai skill (downloaded from its own repo via the `skills` CLI) -----
-
-_SKILL_ADD = ["npx", "-y", "skills", "add", SKILL_REPO, "--global", "--yes"]
-_SKILL_REMOVE = ["npx", "-y", "skills", "remove", "assemblyai", "--global"]
-_SKILL_ADD_HINT = f"npx skills add {SKILL_REPO} --global"
-
-
-def install_skill(force: bool) -> Step:
-    if shutil.which("npx") is None:
-        return {
-            "name": "skill",
-            "status": "skipped",
-            "detail": f"Node.js/npx not found. Install Node.js, then run: {_SKILL_ADD_HINT}",
-        }
-    # Idempotent like the MCP step: if the skill is already on disk and the user
-    # didn't ask to --force, report `already` instead of silently re-downloading
-    # it and always claiming `installed`.
-    if _skill_installed() and not force:
-        return {
-            "name": "skill",
-            "status": "already",
-            "detail": f"assemblyai skill at {_skill_dir()}",
-        }
-    # --global: install at user scope (not project scope, which `skills` auto-selects
-    # when run inside a project) so the skill lands in ~/.claude/skills where `status`
-    # looks. npx -y skips its install prompt; the longer timeout covers the download.
-    proc = _run(_SKILL_ADD, timeout=300)
-    if proc.returncode != 0:
-        return {"name": "skill", "status": "failed", "detail": _proc_detail(proc)}
-    # Trust the filesystem, not the exit code: confirm the skill actually landed
-    # where `status` looks, so the two commands can never disagree.
-    if not _skill_installed():
-        return {
-            "name": "skill",
-            "status": "failed",
-            "detail": (
-                f"'{' '.join(_SKILL_ADD[3:])}' reported success but no skill was found at "
-                f"{_skill_dir()}. Install it manually: {_SKILL_ADD_HINT}"
-            ),
-        }
-    return {"name": "skill", "status": "installed", "detail": str(_skill_dir())}
-
-
-def _skill_status() -> Step:
-    return {
-        "name": "skill",
-        "status": "installed" if _skill_installed() else "not_installed",
-        "detail": str(_skill_dir()),
-    }
-
-
-def _remove_skill() -> Step:
-    if not _skill_installed():
-        return {"name": "skill", "status": "not_installed", "detail": str(_skill_dir())}
-    if shutil.which("npx") is None:
-        return {
-            "name": "skill",
-            "status": "skipped",
-            "detail": "Node.js/npx not found. Remove manually: npx skills remove assemblyai --global",
-        }
-    # `skills` symlinks the skill into ~/.claude/skills from its own store, so let it
-    # do the removal (a plain rmtree would choke on the symlink and orphan the store).
-    proc = _run(_SKILL_REMOVE, timeout=120)
-    if proc.returncode != 0 or _skill_installed():
-        detail = _proc_detail(proc) or "skill still present after removal"
-        return {"name": "skill", "status": "failed", "detail": detail}
-    return {"name": "skill", "status": "removed", "detail": str(_skill_dir())}
-
-
-# --- aai-cli skill (bundled in this package, copied into the agent) -----------
-
-
-def _bundled_cli_skill() -> Traversable:
-    # Ships inside the wheel (force-included via [tool.hatch.build.targets.wheel]
-    # artifacts). skills/ has no __init__.py, so navigate from the aai_cli package.
-    from importlib import resources
-
-    return resources.files("aai_cli") / "skills" / coding_agent.CLI_SKILL_NAME
-
-
-def _copy_tree(node: Traversable, dest: Path) -> None:
-    dest.mkdir(parents=True, exist_ok=True)
-    for child in node.iterdir():
-        if child.name == "__pycache__" or child.name.endswith(".pyc"):
-            continue
-        out = dest / child.name
-        if child.is_dir():
-            _copy_tree(child, out)
-        else:
-            out.write_bytes(child.read_bytes())
-
-
-def install_cli_skill(force: bool) -> Step:
-    # Bundled in the package, so no network/npx — just copy it into the agent's
-    # skills dir. Idempotent: skip the copy when already present and not --force.
-    dest = _cli_skill_dir()
-    if _cli_skill_installed() and not force:
-        return {"name": "aai-cli skill", "status": "already", "detail": f"aai-cli skill at {dest}"}
-    src = _bundled_cli_skill()
-    if not src.is_dir():
-        return {
-            "name": "aai-cli skill",
-            "status": "failed",
-            "detail": f"bundled aai-cli skill missing at {src} — this is a packaging bug.",
-        }
-    if dest.exists():
-        shutil.rmtree(dest)
-    _copy_tree(src, dest)
-    if not _cli_skill_installed():
-        return {
-            "name": "aai-cli skill",
-            "status": "failed",
-            "detail": f"copied the bundled skill but {dest / 'SKILL.md'} is missing.",
-        }
-    return {"name": "aai-cli skill", "status": "installed", "detail": str(dest)}
-
-
-def _cli_skill_status() -> Step:
-    return {
-        "name": "aai-cli skill",
-        "status": "installed" if _cli_skill_installed() else "not_installed",
-        "detail": str(_cli_skill_dir()),
-    }
-
-
-def _remove_cli_skill() -> Step:
-    # We copied a real directory in (not a symlink into a store), so removal is a
-    # plain rmtree of the destination.
-    dest = _cli_skill_dir()
-    if not _cli_skill_installed():
-        return {"name": "aai-cli skill", "status": "not_installed", "detail": str(dest)}
-    shutil.rmtree(dest, ignore_errors=True)
-    if _cli_skill_installed():
-        return {
-            "name": "aai-cli skill",
-            "status": "failed",
-            "detail": "skill still present after removal",
-        }
-    return {"name": "aai-cli skill", "status": "removed", "detail": str(dest)}
-
-
-def render(data: dict[str, list[Step]]) -> str:
-    return render_steps(data["steps"], heading=_STEPS_HEADING)
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.SETUP,
+    order=20,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("setup",),
+    group_name="setup",
+)
 
 
 @app.command(
@@ -273,8 +47,12 @@ def install(
     """
 
     def body(_state: AppState, json_mode: bool) -> None:
-        steps = [install_mcp(scope, force), install_skill(force), install_cli_skill(force)]
-        output.emit({"steps": steps}, render, json_mode=json_mode)
+        steps = [
+            setup_exec.install_mcp(scope, force=force),
+            setup_exec.install_skill(force=force),
+            setup_exec.install_cli_skill(force=force),
+        ]
+        output.emit({"steps": steps}, setup_exec.render, json_mode=json_mode)
         if any(s["status"] == "failed" for s in steps):
             raise typer.Exit(code=1)
 
@@ -296,8 +74,8 @@ def status(
     """Show whether the AssemblyAI MCP server and skills are set up in your coding agent."""
 
     def body(_state: AppState, json_mode: bool) -> None:
-        steps = [_mcp_status(), _skill_status(), _cli_skill_status()]
-        output.emit({"steps": steps}, render, json_mode=json_mode)
+        steps = [setup_exec.mcp_status(), setup_exec.skill_status(), setup_exec.cli_skill_status()]
+        output.emit({"steps": steps}, setup_exec.render, json_mode=json_mode)
 
     run_command(ctx, body, json=json_out)
 
@@ -325,8 +103,12 @@ def remove(
     """Remove the AssemblyAI MCP server and skills from your coding agent."""
 
     def body(_state: AppState, json_mode: bool) -> None:
-        steps = [_remove_mcp(scope), _remove_skill(), _remove_cli_skill()]
-        output.emit({"steps": steps}, render, json_mode=json_mode)
+        steps = [
+            setup_exec.remove_mcp(scope),
+            setup_exec.remove_skill(),
+            setup_exec.remove_cli_skill(),
+        ]
+        output.emit({"steps": steps}, setup_exec.render, json_mode=json_mode)
         if any(s["status"] == "failed" for s in steps):
             raise typer.Exit(code=1)
 
diff --git a/aai_cli/commands/share.py b/aai_cli/commands/share.py
index 6e564a49..d8bba6bb 100644
--- a/aai_cli/commands/share.py
+++ b/aai_cli/commands/share.py
@@ -3,13 +3,19 @@
 
 import typer
 
-from aai_cli import help_panels, options, share_exec
+from aai_cli import command_registry, help_panels, options, share_exec
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 # Flattened single-command sub-typer (same pattern as `assembly dev`).
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.BUILD,
+    order=30,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("share",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.BUILD,
diff --git a/aai_cli/commands/speak.py b/aai_cli/commands/speak.py
index 578a1927..6b1f4881 100644
--- a/aai_cli/commands/speak.py
+++ b/aai_cli/commands/speak.py
@@ -4,13 +4,19 @@
 
 import typer
 
-from aai_cli import help_panels, options, speak_exec
+from aai_cli import command_registry, help_panels, options, speak_exec
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 from aai_cli.speak_exec import DEFAULT_LANGUAGE
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=50,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("speak",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
diff --git a/aai_cli/commands/stream.py b/aai_cli/commands/stream.py
index 4eb9bc40..b90bbc6b 100644
--- a/aai_cli/commands/stream.py
+++ b/aai_cli/commands/stream.py
@@ -5,12 +5,18 @@
 import typer
 from assemblyai.streaming.v3 import Encoding, NoiseSuppressionModel, SpeechModel
 
-from aai_cli import choices, help_panels, llm, options, stream_exec
+from aai_cli import choices, command_registry, help_panels, llm, options, stream_exec
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=20,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("stream",),
+)
+
 DEFAULT_SPEECH_MODEL = SpeechModel.u3_rt_pro
 
 
diff --git a/aai_cli/commands/telemetry.py b/aai_cli/commands/telemetry.py
index c8077201..a376b037 100644
--- a/aai_cli/commands/telemetry.py
+++ b/aai_cli/commands/telemetry.py
@@ -10,7 +10,7 @@
 
 import typer
 
-from aai_cli import config, options, output, telemetry
+from aai_cli import command_registry, config, help_panels, options, output, telemetry
 from aai_cli.context import AppState, run_command
 from aai_cli.help_text import examples_epilog
 
@@ -19,6 +19,13 @@
     no_args_is_help=True,
 )
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.SETUP,
+    order=30,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("telemetry",),
+    group_name="telemetry",
+)
+
 
 def _consent_label() -> str:
     return "granted" if telemetry.consent_granted() else "denied"
diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py
index 3cbf7df7..bb9c871a 100644
--- a/aai_cli/commands/transcribe.py
+++ b/aai_cli/commands/transcribe.py
@@ -5,12 +5,18 @@
 import assemblyai as aai
 import typer
 
-from aai_cli import choices, help_panels, llm, options, transcribe_exec
+from aai_cli import choices, command_registry, help_panels, llm, options, transcribe_exec
 from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=10,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("transcribe",),
+)
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
diff --git a/aai_cli/commands/transcripts.py b/aai_cli/commands/transcripts.py
index 19b2da38..53d54fb3 100644
--- a/aai_cli/commands/transcripts.py
+++ b/aai_cli/commands/transcripts.py
@@ -3,13 +3,29 @@
 import typer
 from rich.markup import escape
 
-from aai_cli import choices, client, options, output, theme, timeparse
+from aai_cli import (
+    choices,
+    client,
+    command_registry,
+    help_panels,
+    options,
+    output,
+    theme,
+    timeparse,
+)
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import APIError
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer(help="Browse and fetch past transcripts.", no_args_is_help=True)
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.HISTORY,
+    order=10,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("transcripts",),
+    group_name="transcripts",
+)
+
 
 # `list` is registered before `get` so the subcommand help lists them in that
 # order, matching `assembly sessions --help`.
diff --git a/aai_cli/commands/webhooks.py b/aai_cli/commands/webhooks.py
index 4aa9af1e..a3d69c43 100644
--- a/aai_cli/commands/webhooks.py
+++ b/aai_cli/commands/webhooks.py
@@ -3,12 +3,19 @@
 
 import typer
 
-from aai_cli import options, webhook_listen
+from aai_cli import command_registry, help_panels, options, webhook_listen
 from aai_cli.context import AppState, run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer(help="Receive webhook deliveries on a public dev URL.", no_args_is_help=True)
 
+SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION,
+    order=110,  # pragma: no mutate -- sparse rank; a +-1 shift is order-equivalent
+    commands=("webhooks",),
+    group_name="webhooks",
+)
+
 
 @app.command(
     epilog=examples_epilog(
diff --git a/aai_cli/doctor_checks.py b/aai_cli/doctor_checks.py
new file mode 100644
index 00000000..494901da
--- /dev/null
+++ b/aai_cli/doctor_checks.py
@@ -0,0 +1,274 @@
+"""Environment diagnostics shared by `assembly doctor` and the onboarding wizard.
+
+Command modules are import-linter-independent, so the checks, their result
+shapes, and the report renderer live here in the core layer; both
+``commands/doctor.py`` and ``onboard/sections.py`` import them from this one
+place so the two renders can't drift.
+"""
+
+from __future__ import annotations
+
+import shutil
+import sys
+from abc import abstractmethod
+from collections.abc import Mapping, Sequence
+from typing import NotRequired, Protocol, TypedDict
+
+from rich.markup import escape
+
+from aai_cli import client, coding_agent, config, environments, output, theme
+from aai_cli.errors import CLIError, NotAuthenticated
+
+
+class Check(TypedDict):
+    """One diagnostic: a named check, its status, what it affects, and how to fix it."""
+
+    name: str
+    status: str  # "ok" | "warn" | "fail" — only "fail" makes `doctor` exit non-zero
+    affects: list[str]
+    detail: str
+    fix: str | None
+
+
+class DoctorResult(TypedDict):
+    ok: bool
+    # Which profile/environment the checks ran against. `assembly doctor` always fills
+    # these in; the onboarding wizard reuses `render` for a partial check without
+    # them, so they stay optional.
+    profile: NotRequired[str]
+    environment: NotRequired[str]
+    checks: list[Check]
+
+
+class _SoundDeviceModule(Protocol):
+    @abstractmethod
+    def query_devices(self) -> Sequence[Mapping[str, object]]:
+        """List the audio devices sounddevice can see."""
+
+
+# Status -> (affordance symbol, render style). "fail" is a blocker; "warn" is
+# degraded-but-usable. Drives the per-check glyph in `render`.
+_SYMBOL = {
+    "ok": (theme.SYMBOL_SUCCESS, "aai.success"),
+    "warn": (theme.SYMBOL_WARN, "aai.warn"),
+    "fail": (theme.SYMBOL_ERROR, "aai.error"),
+}
+
+
+def _check(
+    name: str,
+    status: str,
+    detail: str,
+    *,
+    fix: str | None = None,
+    affects: list[str] | None = None,
+) -> Check:
+    """Assemble a Check. ``affects`` defaults to empty — an 'ok' check blocks nothing."""
+    return {"name": name, "status": status, "affects": affects or [], "detail": detail, "fix": fix}
+
+
+def check_python() -> Check:
+    v = sys.version_info
+    version = f"{v.major}.{v.minor}.{v.micro}"
+    if v >= (3, 12):
+        return _check("python", "ok", version)
+    return _check(
+        "python",
+        "fail",
+        f"Python {version} is too old; the CLI needs 3.12+",
+        fix="Install Python 3.12 or newer, then reinstall the CLI.",
+        affects=["everything"],
+    )
+
+
+# Named check_credentials (not *api_key*): the report dict carries only status text,
+# but CodeQL's name heuristic would treat the call's return value as a secret and flag
+# the doctor payload emit (py/clear-text-logging-sensitive-data).
+def check_credentials(profile: str) -> Check:
+    try:
+        key = config.resolve_api_key(profile=profile)
+    except NotAuthenticated:
+        if not config.keyring_usable():
+            # On a box with no keyring, `assembly login` can't persist a key either, so
+            # point at the env var that actually works here instead of a dead end.
+            return _check(
+                "api-key",
+                "fail",
+                "No API key found, and this machine has no usable OS keyring.",
+                fix="Set ASSEMBLYAI_API_KEY (browser login can't store a key without a keyring).",
+                affects=["everything"],
+            )
+        return _check(
+            "api-key",
+            "fail",
+            "No API key found.",
+            fix="Run 'assembly login' (or set ASSEMBLYAI_API_KEY).",
+            affects=["everything"],
+        )
+    # validate_key doubles as the connectivity probe: it makes one cheap authed call,
+    # so a pass means the key is valid AND the active environment's API is reachable.
+    api_host = environments.active().api_base.removeprefix("https://")
+    try:
+        valid = client.validate_key(key)
+    except CLIError as exc:
+        return _check(
+            "api-key",
+            "fail",
+            f"Could not reach AssemblyAI: {exc.message}",
+            fix=f"Check your network/proxy and that {api_host} is reachable.",
+            affects=["everything"],
+        )
+    if valid:
+        return _check("api-key", "ok", "API key is valid and AssemblyAI is reachable.")
+    # validate_key collapses every auth-shaped failure (401, 403, proxy "forbidden")
+    # to False, so don't claim a specific status code we never saw.
+    return _check(
+        "api-key",
+        "fail",
+        "API key was rejected by the server.",
+        fix="Run 'assembly login' with a valid key.",
+        affects=["everything"],
+    )
+
+
+def check_ffmpeg() -> Check:
+    # ffmpeg is ONLY used to stream non-WAV files or URLs (stream/agent), where it
+    # decodes them to 16 kHz mono PCM on the fly. Plain `transcribe` (including
+    # YouTube URLs) uploads the file to AssemblyAI and never invokes ffmpeg, so it is
+    # not required for transcription.
+    if shutil.which("ffmpeg"):
+        return _check("ffmpeg", "ok", "found")
+    return _check(
+        "ffmpeg",
+        "warn",
+        (
+            "ffmpeg not found. Only needed to stream non-WAV files or URLs; "
+            "transcription (including YouTube) works without it, as does streaming a "
+            "16 kHz mono WAV."
+        ),
+        fix=(
+            "Install ffmpeg (macOS: brew install ffmpeg; Debian/Ubuntu: apt-get install "
+            "ffmpeg; Fedora: dnf install ffmpeg; Windows: winget install ffmpeg)."
+        ),
+        affects=["stream/agent (non-WAV file or URL input)"],
+    )
+
+
+def _probe_input_devices() -> int:
+    """Number of available microphone (input) devices. Raises if audio is unavailable."""
+    sd = _sounddevice()
+    devices = sd.query_devices()
+    return sum(1 for device in devices if _input_channels(device) > 0)
+
+
+def _sounddevice() -> _SoundDeviceModule:
+    import sounddevice as module
+
+    sd: _SoundDeviceModule = module
+    return sd
+
+
+def _input_channels(device: Mapping[str, object]) -> int:
+    channels = device.get("max_input_channels")
+    return channels if isinstance(channels, int) else 0
+
+
+def check_audio() -> Check:
+    affects = ["stream (microphone)", "agent"]
+    try:
+        inputs = _probe_input_devices()
+    except ImportError:
+        return _check(
+            "audio",
+            "warn",
+            "sounddevice is not importable; the microphone can't be used.",
+            fix="pip install --force-reinstall sounddevice",
+            affects=affects,
+        )
+    except Exception as exc:  # noqa: BLE001 - any PortAudio/device failure is a soft warning
+        return _check(
+            "audio",
+            "warn",
+            f"audio system unavailable: {exc}",
+            fix=(
+                "Install PortAudio (Debian/Ubuntu: sudo apt-get install libportaudio2; "
+                "Fedora: sudo dnf install portaudio; macOS: brew install portaudio)."
+            ),
+            affects=affects,
+        )
+    if inputs == 0:
+        return _check(
+            "audio",
+            "warn",
+            "No microphone (input device) found.",
+            fix="Connect a microphone; live mic input is needed for stream/agent.",
+            affects=affects,
+        )
+    return _check("audio", "ok", f"{inputs} microphone input device(s) available.")
+
+
+def check_coding_agent() -> Check:
+    missing = [tool for tool in ("claude", "npx") if shutil.which(tool) is None]
+    if not missing:
+        # Tools are present, so report what `assembly setup install` actually
+        # installed rather than always suggesting it.
+        not_installed = coding_agent.missing_components()
+        if not not_installed:
+            return _check(
+                "coding-agent", "ok", "claude and npx found; docs MCP + skills installed."
+            )
+        return _check(
+            "coding-agent",
+            "ok",
+            "claude and npx found; run 'assembly setup install' to add: "
+            f"{', '.join(not_installed)}.",
+        )
+    return _check(
+        "coding-agent",
+        "warn",
+        f"not found: {', '.join(missing)}.",
+        fix=(
+            "Install Claude Code (https://claude.com/claude-code) and Node.js, "
+            "then run 'assembly setup install'."
+        ),
+        affects=["assembly setup install"],
+    )
+
+
+def render_check_lines(checks: list[Check]) -> list[str]:
+    """The per-check report lines (glyph, name — detail, indented fix hint).
+
+    Shared with the onboarding wizard's environment section (which renders the same
+    checks with its own summary line), so the two renders can't drift."""
+    lines: list[str] = []
+    for c in checks:
+        symbol, style = _SYMBOL.get(c["status"], (theme.SYMBOL_HINT, "aai.muted"))
+        lines.append(
+            f"  [{style}]{escape(symbol)}[/{style}] {escape(c['name'])} — {escape(c['detail'])}"
+        )
+        if c["fix"]:
+            lines.append("      " + output.hint(f"fix: {escape(c['fix'])}"))
+    return lines
+
+
+def render(data: DoctorResult) -> str:
+    checks = data["checks"]
+    lines = [output.heading("Environment check")]
+    profile, environment = data.get("profile"), data.get("environment")
+    if profile is not None and environment is not None:
+        lines.append(
+            "  " + output.hint(f"profile: {escape(profile)} · environment: {escape(environment)}")
+        )
+    lines.extend(render_check_lines(checks))
+    if data["ok"]:
+        lines.append("  " + output.success("Everything looks good."))
+        # Only the real `assembly doctor` carries profile context; the onboarding wizard
+        # reuses render() for a partial check and has its own next-steps, so don't
+        # tack a "try transcribe" hint onto that one.
+        if data.get("profile") is not None:
+            lines.append("  " + output.hint("Try it: assembly transcribe --sample"))
+    else:
+        failed = sum(1 for c in checks if c["status"] == "fail")
+        noun = "problem" if failed == 1 else "problems"
+        lines.append("  " + output.fail(f"{failed} {noun} found — see fixes above."))
+    return "\n".join(lines)
diff --git a/aai_cli/help_panels.py b/aai_cli/help_panels.py
index 6eee4499..218448f7 100644
--- a/aai_cli/help_panels.py
+++ b/aai_cli/help_panels.py
@@ -3,8 +3,9 @@
 Rich groups top-level commands under these headings (via each command's
 ``rich_help_panel``), so the root help reads as a journey rather than a flat
 list — the same approach the Vercel and Supabase CLIs take. Panels render in
-the order their first command appears (see ``_COMMAND_ORDER`` in ``main.py``);
-most-used commands first, account/setup last.
+``PANEL_ORDER``; within a panel, each command module's ``SPEC.order`` rank
+decides (see ``aai_cli.command_registry``); most-used commands first,
+account/setup last.
 
 Centralized here so the heading strings have one source of truth — a typo in a
 decorator would otherwise silently spawn a duplicate panel.
@@ -19,6 +20,12 @@
 ACCOUNT = "Account"  # auth, billing, keys: login/logout/whoami, balance/usage/limits, keys, audit
 SETUP = "Setup & Tools"  # get set up & maintain: doctor, setup
 
+# The order panels render under `assembly --help`. Each command module declares the
+# panel it belongs to (`SPEC` in aai_cli/commands/*.py — see aai_cli.command_registry),
+# and ordering within a panel comes from that module's sparse `order` rank, so adding
+# a command never edits a shared ordering list; only a brand-new panel touches this.
+PANEL_ORDER = (QUICK_START, BUILD, TRANSCRIPTION, SETUP, HISTORY, ACCOUNT)
+
 # Option panels group a single command's flags within its own ``--help``. The
 # `transcribe` command exposes 40+ options; without panels they render as one
 # flat wall. Each ``typer.Option(rich_help_panel=...)`` files the flag under one
diff --git a/aai_cli/main.py b/aai_cli/main.py
index b3a233c8..c87f9880 100644
--- a/aai_cli/main.py
+++ b/aai_cli/main.py
@@ -2,16 +2,9 @@
 
 import logging
 import sys
-from types import ModuleType
 from typing import TYPE_CHECKING
 
 import typer
-from rich.console import RenderableType
-from rich.style import StyleType
-from rich.table import Table
-from typer import completion, rich_utils
-from typer._click.exceptions import ClickException, NoSuchOption
-from typer._click.exceptions import UsageError as ClickUsageError
 from typer._click.utils import PacifyFlushWrapper
 from typer.core import TyperGroup
 
@@ -20,81 +13,32 @@
     # context type, not the upstream click.Context. Imported for typing only.
     from typer._click.core import Context as ClickContext
 
-from aai_cli import __version__, argscan, debuglog, environments, help_panels, output, stdio, theme
-from aai_cli.commands import (
-    account,
-    agent,
-    audit,
-    caption,
-    clip,
-    deploy,
-    dev,
-    dictate,
-    doctor,
-    dub,
-    evaluate,
-    init,
-    keys,
-    llm,
-    login,
-    onboard,
-    sessions,
-    setup,
-    share,
-    speak,
-    stream,
-    telemetry,
-    transcribe,
-    transcripts,
-    webhooks,
+from aai_cli import (
+    __version__,
+    argscan,
+    command_registry,
+    debuglog,
+    environments,
+    output,
+    stdio,
+    typer_patches,
 )
+from aai_cli.commands import onboard
 from aai_cli.context import AppState
-from aai_cli.errors import CLIError, NotAuthenticated, UsageError
+from aai_cli.errors import CLIError, NotAuthenticated
 from aai_cli.help_text import examples_epilog
 from aai_cli.onboard import wizard
 from aai_cli.onboard.sections import WizardContext
 
-# The order commands appear under `assembly --help`. Commands are grouped into named
-# Rich panels (see `help_panels.py`); panels render in the order their first
-# command appears here, so keep each panel's commands contiguous and ordered
-# most-common-first. Names not listed fall to the end, sorted alphabetically.
-_COMMAND_ORDER = (
-    # Quick Start — zero-to-running onboarding
-    "onboard",
-    # Build an App — scaffold a new project
-    "init",
-    "dev",
-    "share",
-    "deploy",
-    # Run AssemblyAI — use AssemblyAI directly from the terminal
-    "transcribe",
-    "stream",
-    "dictate",
-    "agent",
-    "speak",
-    "llm",
-    "clip",
-    "dub",
-    "caption",
-    "eval",
-    "webhooks",
-    # Setup & Tools — get set up & maintain
-    "doctor",
-    "setup",
-    "telemetry",
-    # History — browse past work
-    "transcripts",
-    "sessions",
-    # Account — auth, then billing, then keys
-    "login",
-    "logout",
-    "whoami",
-    "balance",
-    "usage",
-    "limits",
-    "keys",
-    "audit",
-)
+# Every module under aai_cli/commands/ declares its own panel, rank, and command
+# names (`SPEC`, see aai_cli/command_registry.py); discovery imports and orders them
+# all, so registering a new command edits no shared list in this file.
+_REGISTERED_COMMAND_MODULES = command_registry.discover()
+
+# The order commands appear under `assembly --help`, derived from each module's SPEC:
+# panels render in help_panels.PANEL_ORDER and stay contiguous by construction.
+# Names not listed (the hidden _update-check) fall to the end, sorted alphabetically.
+_COMMAND_ORDER = command_registry.command_order(_REGISTERED_COMMAND_MODULES)
 
 
 class _OrderedGroup(TyperGroup):
@@ -115,149 +59,15 @@ def parse_args(self, ctx: ClickContext, args: list[str]) -> list[str]:
         # tell whether the (not-yet-parsed) subcommand opted into JSON — see
         # `argscan.requests_json`. Recorded here because Click clears the pending
         # args off the context before the group callback runs.
-        ctx.meta[_RAW_ARGS_META_KEY] = list(args)
+        ctx.meta[argscan.RAW_ARGS_META_KEY] = list(args)
         return super().parse_args(ctx, args)
 
 
-# Typer's default help palette is a rainbow: option flags/command names in "bold cyan",
-# the short switch (e.g. -p) in "bold green", and the type metavar (e.g. TEXT) in "bold
-# yellow". Retint the whole panel into the Cobolt brand family so help reads as one
-# monochrome hierarchy: flags and command names in the bold primary accent, their short
-# aliases matching, and the type metavar in the lighter secondary Cobolt so it recedes.
-# Set before the app renders any help.
-rich_utils.STYLE_OPTION = f"bold {theme.BRAND}"
-rich_utils.STYLE_COMMANDS_TABLE_FIRST_COLUMN = f"bold {theme.BRAND}"
-rich_utils.STYLE_SWITCH = f"bold {theme.BRAND}"
-rich_utils.STYLE_METAVAR = theme.ACCENT
-# The usage line ("Usage: assembly [OPTIONS] COMMAND [ARGS]...") defaults to yellow. Keep the
-# program name in the bold brand accent so it matches command names elsewhere, but drop
-# the "Usage:" label and arg spec to muted warm gray — it's boilerplate that should recede.
-rich_utils.STYLE_USAGE = theme.MUTED
-rich_utils.STYLE_USAGE_COMMAND = f"bold {theme.BRAND}"
-
-
-# Help tables put flag/command names in the leading columns and wrapping prose
-# (metavar, help text) in the trailing two. Rich's width collapse only spares no_wrap
-# columns, so on a narrow terminal it happily clips a flag name to "--end-of-turn-c…" —
-# unlearnable from the help screen itself. Pin every column except the last two so the
-# prose columns absorb the squeeze instead.
-class _NoClipTable(Table):
-    def add_row(
-        self,
-        *renderables: RenderableType | None,
-        style: StyleType | None = None,
-        end_section: bool = False,
-    ) -> None:
-        super().add_row(*renderables, style=style, end_section=end_section)
-        for column in self.columns[:-2]:
-            column.no_wrap = True
-
-
-def _patch_module(module: ModuleType, **attrs: object) -> None:
-    """Replace module attributes that are imports (not definitions) in their module —
-    strict mypy's no-implicit-reexport rejects plain attribute assignment for those."""
-    for name, value in attrs.items():
-        setattr(module, name, value)
-
-
-# Typer's own help/error consoles must also honor the closed-pipe contract: with
-# Rich's default Console, `assembly --help | head -2` exits 1 via Console.on_broken_pipe.
-_patch_module(rich_utils, Table=_NoClipTable, Console=theme.PipeSafeConsole)
-
-_format_click_error = rich_utils.rich_format_error
-
-# Flags users habitually pass at the wrong level: `--json` belongs on the subcommand
-# (`assembly transcribe --json`), while the root callback's flags belong before it
-# (`assembly --sandbox transcribe`). A bare "No such option" — or worse, a similarity
-# guess like "(Possible options: --version)" — is unlearnable, so the Click error
-# formatter appends the correct placement instead.
-
-
-def _root_only_flags(ctx: ClickContext) -> frozenset[str]:
-    """Every flag the root callback declares (--quiet, --sandbox, --env, …), read off
-    the declarations themselves so a new global flag gets the placement hint without
-    a hand-maintained parallel list."""
-    return frozenset(opt for param in ctx.find_root().command.params for opt in param.opts)
-
-
-def _misplaced_flag_hint(err: NoSuchOption) -> str | None:
-    """A placement hint when a known flag landed at the wrong level, else None."""
-    ctx = err.ctx
-    if ctx is None:
-        return None
-    if ctx.parent is None:
-        if err.option_name in argscan.JSON_FLAGS:
-            return "Pass --json after the subcommand: assembly <command> --json"
-        return None
-    if err.option_name in _root_only_flags(ctx):
-        command = ctx.command_path.removeprefix("assembly ")
-        return (
-            "This is a global flag; pass it before the subcommand: "
-            f"assembly {err.option_name} {command} …"
-        )
-    return None
-
-
-def _rewrite_version_command_error(err: ClickException) -> None:
-    # There is no `version` subcommand (the reflex is `assembly --version`), and the
-    # closest-match engine would suggest an unrelated command ("Did you mean
-    # 'sessions'?"). Point at the real spelling instead.
-    if err.message.startswith("No such command 'version'"):
-        err.message = "No such command 'version'. Did you mean 'assembly --version'?"
-
-
-def _click_error_requests_json(err: ClickException) -> bool:
-    """Whether the invocation that failed to parse had opted into JSON output.
-
-    A parse error fires before any command's own ``--json`` is read, so sniff the raw
-    token list the root group stashed on the context (see ``_OrderedGroup.parse_args``).
-    A ClickException raised without a context falls back to the process argv.
-    """
-    ctx = err.ctx if isinstance(err, ClickUsageError) else None
-    if ctx is not None and _RAW_ARGS_META_KEY in ctx.meta:
-        raw_args: list[str] = ctx.meta[_RAW_ARGS_META_KEY]
-    else:
-        raw_args = sys.argv[1:]
-    return argscan.requests_json(raw_args)
-
-
-def _format_click_error_fixed(self: ClickException) -> None:
-    # Typer's vendored Click renders flag suggestions as a stringified 1-tuple:
-    # "No such option: --jsno ('(Possible options: --json)',)". Fold the suggestion
-    # into the message ourselves so the user sees "(Possible options: --json)" — or,
-    # for a known flag passed at the wrong level, the placement hint instead of a
-    # misleading similarity guess.
-    if isinstance(self, NoSuchOption):
-        hint = _misplaced_flag_hint(self)
-        if hint is not None:
-            self.message = f"{self.message}. {hint}"
-        elif self.possibilities:
-            self.message = (
-                f"{self.message} (Possible options: {', '.join(sorted(self.possibilities))})"
-            )
-        self.possibilities = None
-    _rewrite_version_command_error(self)
-    if _click_error_requests_json(self):
-        # An invocation that opted into JSON gets the uniform {"error": …} envelope for
-        # parse errors too, mirroring the root-callback failure path; the exit code (2)
-        # is Click's and unchanged. NoArgsIsHelpError never reaches this branch: its
-        # message is the help screen and a bare invocation carries no JSON flag.
-        output.emit_error(UsageError(self.format_message()), json_mode=True)
-        return
-    _format_click_error(self)
-
-
-rich_utils.rich_format_error = _format_click_error_fixed
-
-# Typer's built-in `--show-completion` help is long enough to wrap several lines in
-# the options panel. Trim it so it fits on fewer rows. The OptionInfo objects live on
-# the completion placeholder's parameter defaults; reach the (underscore-prefixed)
-# placeholder through the module dict so it isn't flagged as private-attribute use.
-_completion_placeholder = vars(completion)["_install_completion_placeholder_function"]
-for _opt in _completion_placeholder.__defaults__ or ():
-    if isinstance(_opt.help, str) and _opt.help.startswith("Show completion"):
-        _opt.help = "Show completion for the current shell."
-
+# Brand-retint Typer's help palette, pin help-table columns against clipping, make
+# Typer's consoles pipe-safe, fix Click's error formatting, and trim the completion
+# help — every Typer/Click/Rich override lives in typer_patches so a dependency
+# upgrade that breaks one is fixed in one file. Must run before any help renders.
+typer_patches.apply()
 
 app = typer.Typer(
     name="assembly",
@@ -287,14 +97,6 @@ def _profile_has_key(state: AppState) -> bool:
     return True
 
 
-# The root callback runs before the subcommand parses its own ``--json``, so a failure
-# raised there (e.g. a bad ``--env``) would otherwise always render human text — leaving a
-# ``… --json`` pipeline without the uniform ``{"error": …}`` shape it relies on. The group
-# stashes the raw token list in ``ctx.meta`` (see ``_OrderedGroup.parse_args``) before the
-# callback runs, so sniffing it with ``argscan.requests_json`` lets every failure class
-# honor the request.
-_RAW_ARGS_META_KEY = "aai_raw_args"
-
 _LOG = logging.getLogger("aai_cli")
 
 
@@ -379,7 +181,7 @@ def main(
     # Enabled before anything else runs so even environment/profile resolution
     # failures can be diagnosed with -v.
     debuglog.enable(verbose)
-    raw_args: list[str] = ctx.meta.get(_RAW_ARGS_META_KEY, [])
+    raw_args: list[str] = ctx.meta.get(argscan.RAW_ARGS_META_KEY, [])
     json_mode = output.resolve_json(explicit=argscan.requests_json(raw_args))
     conflict_warning = _sandbox_conflict_warning(sandbox, env)
     if sandbox and env is None:
@@ -402,35 +204,20 @@ def main(
         _offer_or_help(ctx, state)
 
 
-# Help-panel grouping: named sub-typers carry their panel on `add_typer`; merged
-# (nameless) sub-typers don't propagate it, so those commands set `rich_help_panel`
-# on their own `@app.command()` (see each command module). Final ordering within a
-# panel is controlled by `_COMMAND_ORDER` via `_OrderedGroup`, not registration order.
-app.add_typer(transcribe.app)
-app.add_typer(stream.app)
-app.add_typer(dictate.app)
-app.add_typer(transcripts.app, name="transcripts", rich_help_panel=help_panels.HISTORY)
-app.add_typer(sessions.app, name="sessions", rich_help_panel=help_panels.HISTORY)
-app.add_typer(audit.app)  # audit
-app.add_typer(agent.app)
-app.add_typer(speak.app)
-app.add_typer(llm.app)
-app.add_typer(clip.app)
-app.add_typer(dub.app)
-app.add_typer(caption.app)
-app.add_typer(evaluate.app)  # eval
-app.add_typer(account.app)  # balance, usage, limits
-app.add_typer(login.app)  # login, logout, whoami
-app.add_typer(doctor.app)
-app.add_typer(init.app)
-app.add_typer(dev.app)
-app.add_typer(share.app)
-app.add_typer(deploy.app)
-app.add_typer(onboard.app)
-app.add_typer(setup.app, name="setup", rich_help_panel=help_panels.SETUP)
-app.add_typer(telemetry.app, name="telemetry", rich_help_panel=help_panels.SETUP)
-app.add_typer(keys.app, name="keys", rich_help_panel=help_panels.ACCOUNT)
-app.add_typer(webhooks.app, name="webhooks", rich_help_panel=help_panels.TRANSCRIPTION)
+# Help-panel grouping: named sub-typers (SPEC.group_name set) carry their panel on
+# `add_typer`; merged (nameless) sub-typers don't propagate it, so those commands set
+# `rich_help_panel` on their own `@app.command()` (see each command module). Final
+# ordering within a panel comes from each module's SPEC via `_OrderedGroup`, not
+# registration order.
+for _registered in _REGISTERED_COMMAND_MODULES:
+    if _registered.spec.group_name is None:
+        app.add_typer(_registered.app)
+    else:
+        app.add_typer(
+            _registered.app,
+            name=_registered.spec.group_name,
+            rich_help_panel=_registered.spec.panel,
+        )
 
 
 @app.command(
diff --git a/aai_cli/onboard/sections.py b/aai_cli/onboard/sections.py
index 80695c45..60f4d493 100644
--- a/aai_cli/onboard/sections.py
+++ b/aai_cli/onboard/sections.py
@@ -7,9 +7,16 @@
 import assemblyai as aai
 import typer
 
-from aai_cli import config, environments, init_exec, output, transcribe_exec, transcribe_render
-from aai_cli.commands import doctor as doctor_cmd
-from aai_cli.commands import setup as setup_cmd
+from aai_cli import (
+    config,
+    doctor_checks,
+    environments,
+    init_exec,
+    output,
+    setup_exec,
+    transcribe_exec,
+    transcribe_render,
+)
 from aai_cli.context import AppState, persist_browser_login
 from aai_cli.errors import CLIError
 from aai_cli.init import runner
@@ -98,7 +105,7 @@ def first_request(prompter: Prompter, ctx: WizardContext) -> SectionResult:
 ]
 
 
-def _environment_summary(checks: list[doctor_cmd.Check]) -> str:
+def _environment_summary(checks: list[doctor_checks.Check]) -> str:
     """The closing line, computed from the actual statuses: doctor.render's
     all-or-nothing `ok` flag can't say "warnings only", which previously put
     "Everything looks good." right under a warning."""
@@ -113,19 +120,19 @@ def _environment_summary(checks: list[doctor_cmd.Check]) -> str:
     return output.success("Everything looks good.")
 
 
-def _render_environment(checks: list[doctor_cmd.Check]) -> str:
+def _render_environment(checks: list[doctor_checks.Check]) -> str:
     """The wizard's render of the doctor checks: doctor's own per-check lines, with
     the summary derived from what the checks actually reported."""
-    lines = [output.heading("Environment check"), *doctor_cmd.render_check_lines(checks)]
+    lines = [output.heading("Environment check"), *doctor_checks.render_check_lines(checks)]
     lines.append("  " + _environment_summary(checks))
     return "\n".join(lines)
 
 
 def environment(prompter: Prompter, ctx: WizardContext) -> SectionResult:
     checks = [
-        doctor_cmd.check_python(),
-        doctor_cmd.check_ffmpeg(),
-        doctor_cmd.check_audio(),
+        doctor_checks.check_python(),
+        doctor_checks.check_ffmpeg(),
+        doctor_checks.check_audio(),
     ]
     if not ctx.json_mode:  # --json owns stdout (the final summary); skip the human render
         # `_render_environment` prints its own "Environment check" heading, so we don't
@@ -175,11 +182,11 @@ def claude_code(prompter: Prompter, _ctx: WizardContext) -> SectionResult:
     if not prompter.confirm("Wire up Claude Code (docs MCP + skills)?", default=False):
         return SectionResult.SKIPPED
     steps = [
-        setup_cmd.install_mcp("user", force=False),
-        setup_cmd.install_skill(force=False),
-        setup_cmd.install_cli_skill(force=False),
+        setup_exec.install_mcp("user", force=False),
+        setup_exec.install_skill(force=False),
+        setup_exec.install_cli_skill(force=False),
     ]
-    output.console.print(setup_cmd.render({"steps": steps}))
+    output.console.print(setup_exec.render({"steps": steps}))
     if any(s["status"] == "failed" for s in steps):
         return SectionResult.FAILED
     return SectionResult.DONE
diff --git a/aai_cli/setup_exec.py b/aai_cli/setup_exec.py
new file mode 100644
index 00000000..e09832ff
--- /dev/null
+++ b/aai_cli/setup_exec.py
@@ -0,0 +1,243 @@
+"""Install/status/remove steps for `assembly setup`, shared with onboarding.
+
+Command modules are import-linter-independent, so the step implementations live
+here in the core layer; ``commands/setup.py`` drives them from the CLI and the
+onboarding wizard (``onboard/sections.py``) reuses the install steps directly.
+"""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from aai_cli import coding_agent
+from aai_cli.steps import Step, render_steps
+
+if TYPE_CHECKING:
+    # Annotation only (PEP 563 string), so no runtime import. Import from
+    # importlib.abc — that is the protocol `resources.files()` is typed to return.
+    from importlib.abc import Traversable
+
+MCP_URL = "https://mcp.assemblyai.com/docs"
+SKILL_REPO = "AssemblyAI/assemblyai-skill"
+_STEPS_HEADING = "AssemblyAI coding-agent setup:"
+
+# The subprocess wrapper, artifact names, and presence probes are shared with
+# `assembly doctor`, so they live in aai_cli.coding_agent; the names below keep
+# this module's call sites stable.
+MCP_NAME = coding_agent.MCP_NAME
+_run = coding_agent.run
+_mcp_present = coding_agent.mcp_present
+_skill_dir = coding_agent.skill_dir
+_skill_installed = coding_agent.skill_installed
+_cli_skill_dir = coding_agent.cli_skill_dir
+_cli_skill_installed = coding_agent.cli_skill_installed
+
+
+def _proc_detail(proc: subprocess.CompletedProcess[str]) -> str:
+    """The error text from a finished process: stderr if present, else stdout."""
+    return (proc.stderr or proc.stdout).strip()
+
+
+# --- docs MCP (registered via the `claude` CLI) ------------------------------
+
+
+def install_mcp(scope: str, *, force: bool) -> Step:
+    if shutil.which("claude") is None:
+        return {
+            "name": "mcp",
+            "status": "skipped",
+            "detail": (
+                "Claude Code not found. Install it (https://claude.com/claude-code), "
+                f"then run: claude mcp add --transport http --scope {scope} "
+                f"{MCP_NAME} {MCP_URL}"
+            ),
+        }
+    if _mcp_present():
+        if not force:
+            return {"name": "mcp", "status": "already", "detail": f"{MCP_NAME} already registered"}
+        removed = _run(["claude", "mcp", "remove", MCP_NAME])
+        if removed.returncode != 0:
+            return {
+                "name": "mcp",
+                "status": "failed",
+                "detail": f"could not remove existing {MCP_NAME}: " + _proc_detail(removed),
+            }
+    proc = _run(
+        ["claude", "mcp", "add", "--transport", "http", "--scope", scope, MCP_NAME, MCP_URL]
+    )
+    if proc.returncode != 0:
+        return {"name": "mcp", "status": "failed", "detail": _proc_detail(proc)}
+    return {"name": "mcp", "status": "installed", "detail": f"{MCP_NAME} @ {scope} scope"}
+
+
+def mcp_status() -> Step:
+    if shutil.which("claude") is None:
+        return {"name": "mcp", "status": "unknown", "detail": "Claude Code not found"}
+    present = _mcp_present()
+    return {
+        "name": "mcp",
+        "status": "installed" if present else "not_installed",
+        "detail": MCP_NAME,
+    }
+
+
+def remove_mcp(scope: str | None) -> Step:
+    if shutil.which("claude") is None:
+        return {"name": "mcp", "status": "skipped", "detail": "Claude Code not found"}
+    if not _mcp_present():
+        return {"name": "mcp", "status": "not_installed", "detail": MCP_NAME}
+    cmd = ["claude", "mcp", "remove", MCP_NAME]
+    if scope is not None:
+        cmd += ["--scope", scope]
+    proc = _run(cmd)
+    if proc.returncode != 0:
+        return {"name": "mcp", "status": "failed", "detail": _proc_detail(proc)}
+    return {"name": "mcp", "status": "removed", "detail": MCP_NAME}
+
+
+# --- assemblyai skill (downloaded from its own repo via the `skills` CLI) -----
+
+_SKILL_ADD = ["npx", "-y", "skills", "add", SKILL_REPO, "--global", "--yes"]
+_SKILL_REMOVE = ["npx", "-y", "skills", "remove", "assemblyai", "--global"]
+_SKILL_ADD_HINT = f"npx skills add {SKILL_REPO} --global"
+
+
+def install_skill(*, force: bool) -> Step:
+    if shutil.which("npx") is None:
+        return {
+            "name": "skill",
+            "status": "skipped",
+            "detail": f"Node.js/npx not found. Install Node.js, then run: {_SKILL_ADD_HINT}",
+        }
+    # Idempotent like the MCP step: if the skill is already on disk and the user
+    # didn't ask to --force, report `already` instead of silently re-downloading
+    # it and always claiming `installed`.
+    if _skill_installed() and not force:
+        return {
+            "name": "skill",
+            "status": "already",
+            "detail": f"assemblyai skill at {_skill_dir()}",
+        }
+    # --global: install at user scope (not project scope, which `skills` auto-selects
+    # when run inside a project) so the skill lands in ~/.claude/skills where `status`
+    # looks. npx -y skips its install prompt; the longer timeout covers the download.
+    proc = _run(_SKILL_ADD, timeout=300)
+    if proc.returncode != 0:
+        return {"name": "skill", "status": "failed", "detail": _proc_detail(proc)}
+    # Trust the filesystem, not the exit code: confirm the skill actually landed
+    # where `status` looks, so the two commands can never disagree.
+    if not _skill_installed():
+        return {
+            "name": "skill",
+            "status": "failed",
+            "detail": (
+                f"'{' '.join(_SKILL_ADD[3:])}' reported success but no skill was found at "
+                f"{_skill_dir()}. Install it manually: {_SKILL_ADD_HINT}"
+            ),
+        }
+    return {"name": "skill", "status": "installed", "detail": str(_skill_dir())}
+
+
+def skill_status() -> Step:
+    return {
+        "name": "skill",
+        "status": "installed" if _skill_installed() else "not_installed",
+        "detail": str(_skill_dir()),
+    }
+
+
+def remove_skill() -> Step:
+    if not _skill_installed():
+        return {"name": "skill", "status": "not_installed", "detail": str(_skill_dir())}
+    if shutil.which("npx") is None:
+        return {
+            "name": "skill",
+            "status": "skipped",
+            "detail": "Node.js/npx not found. Remove manually: npx skills remove assemblyai --global",
+        }
+    # `skills` symlinks the skill into ~/.claude/skills from its own store, so let it
+    # do the removal (a plain rmtree would choke on the symlink and orphan the store).
+    proc = _run(_SKILL_REMOVE, timeout=120)
+    if proc.returncode != 0 or _skill_installed():
+        detail = _proc_detail(proc) or "skill still present after removal"
+        return {"name": "skill", "status": "failed", "detail": detail}
+    return {"name": "skill", "status": "removed", "detail": str(_skill_dir())}
+
+
+# --- aai-cli skill (bundled in this package, copied into the agent) -----------
+
+
+def _bundled_cli_skill() -> Traversable:
+    # Ships inside the wheel (force-included via [tool.hatch.build.targets.wheel]
+    # artifacts). skills/ has no __init__.py, so navigate from the aai_cli package.
+    from importlib import resources
+
+    return resources.files("aai_cli") / "skills" / coding_agent.CLI_SKILL_NAME
+
+
+def _copy_tree(node: Traversable, dest: Path) -> None:
+    dest.mkdir(parents=True, exist_ok=True)
+    for child in node.iterdir():
+        if child.name == "__pycache__" or child.name.endswith(".pyc"):
+            continue
+        out = dest / child.name
+        if child.is_dir():
+            _copy_tree(child, out)
+        else:
+            out.write_bytes(child.read_bytes())
+
+
+def install_cli_skill(*, force: bool) -> Step:
+    # Bundled in the package, so no network/npx — just copy it into the agent's
+    # skills dir. Idempotent: skip the copy when already present and not --force.
+    dest = _cli_skill_dir()
+    if _cli_skill_installed() and not force:
+        return {"name": "aai-cli skill", "status": "already", "detail": f"aai-cli skill at {dest}"}
+    src = _bundled_cli_skill()
+    if not src.is_dir():
+        return {
+            "name": "aai-cli skill",
+            "status": "failed",
+            "detail": f"bundled aai-cli skill missing at {src} — this is a packaging bug.",
+        }
+    if dest.exists():
+        shutil.rmtree(dest)
+    _copy_tree(src, dest)
+    if not _cli_skill_installed():
+        return {
+            "name": "aai-cli skill",
+            "status": "failed",
+            "detail": f"copied the bundled skill but {dest / 'SKILL.md'} is missing.",
+        }
+    return {"name": "aai-cli skill", "status": "installed", "detail": str(dest)}
+
+
+def cli_skill_status() -> Step:
+    return {
+        "name": "aai-cli skill",
+        "status": "installed" if _cli_skill_installed() else "not_installed",
+        "detail": str(_cli_skill_dir()),
+    }
+
+
+def remove_cli_skill() -> Step:
+    # We copied a real directory in (not a symlink into a store), so removal is a
+    # plain rmtree of the destination.
+    dest = _cli_skill_dir()
+    if not _cli_skill_installed():
+        return {"name": "aai-cli skill", "status": "not_installed", "detail": str(dest)}
+    shutil.rmtree(dest, ignore_errors=True)
+    if _cli_skill_installed():
+        return {
+            "name": "aai-cli skill",
+            "status": "failed",
+            "detail": "skill still present after removal",
+        }
+    return {"name": "aai-cli skill", "status": "removed", "detail": str(dest)}
+
+
+def render(data: dict[str, list[Step]]) -> str:
+    return render_steps(data["steps"], heading=_STEPS_HEADING)
diff --git a/aai_cli/typer_patches.py b/aai_cli/typer_patches.py
new file mode 100644
index 00000000..160868e0
--- /dev/null
+++ b/aai_cli/typer_patches.py
@@ -0,0 +1,180 @@
+"""Every patch the CLI applies to Typer's vendored Click and Rich rendering.
+
+Typer's defaults break four contracts this CLI keeps: a rainbow help palette
+(retinted to the brand family), flag-name columns clipped to "--end-of-turn-c…"
+on narrow terminals (pinned via ``_NoClipTable``), `assembly --help | head` exiting 1
+on the closed pipe (``theme.PipeSafeConsole``), and unknown-flag errors that
+leak a tuple repr or suggest the wrong placement (the error-formatter patch).
+
+Isolated here — not inline in ``main.py`` — so a Typer/Click/Rich upgrade that
+breaks a patch is fixed in one file. Written against Typer >= 0.13 (the
+vendored-click era); each patch notes the upstream behavior it overrides.
+``main.py`` calls :func:`apply` once at import time, before any help renders.
+"""
+
+from __future__ import annotations
+
+import sys
+from types import ModuleType
+from typing import TYPE_CHECKING
+
+from rich.console import RenderableType
+from rich.style import StyleType
+from rich.table import Table
+from typer import completion, rich_utils
+from typer._click.exceptions import ClickException, NoSuchOption
+from typer._click.exceptions import UsageError as ClickUsageError
+
+from aai_cli import argscan, output, theme
+from aai_cli.errors import UsageError
+
+if TYPE_CHECKING:
+    # Typer (>=0.13) vendors its own click; these patches receive its context
+    # type, not the upstream click.Context. Imported for typing only.
+    from typer._click.core import Context as ClickContext
+
+
+# Help tables put flag/command names in the leading columns and wrapping prose
+# (metavar, help text) in the trailing two. Rich's width collapse only spares no_wrap
+# columns, so on a narrow terminal it happily clips a flag name to "--end-of-turn-c…" —
+# unlearnable from the help screen itself. Pin every column except the last two so the
+# prose columns absorb the squeeze instead.
+class _NoClipTable(Table):
+    def add_row(
+        self,
+        *renderables: RenderableType | None,
+        style: StyleType | None = None,
+        end_section: bool = False,
+    ) -> None:
+        super().add_row(*renderables, style=style, end_section=end_section)
+        for column in self.columns[:-2]:
+            column.no_wrap = True
+
+
+def _patch_module(module: ModuleType, **attrs: object) -> None:
+    """Replace module attributes that are imports (not definitions) in their module —
+    strict mypy's no-implicit-reexport rejects plain attribute assignment for those."""
+    for name, value in attrs.items():
+        setattr(module, name, value)
+
+
+# The original Click error renderer, captured at import time — before apply() swaps
+# it — so the patched formatter can delegate the human-text path to the real one.
+_format_click_error = rich_utils.rich_format_error
+
+# Flags users habitually pass at the wrong level: `--json` belongs on the subcommand
+# (`assembly transcribe --json`), while the root callback's flags belong before it
+# (`assembly --sandbox transcribe`). A bare "No such option" — or worse, a similarity
+# guess like "(Possible options: --version)" — is unlearnable, so the Click error
+# formatter appends the correct placement instead.
+
+
+def _root_only_flags(ctx: ClickContext) -> frozenset[str]:
+    """Every flag the root callback declares (--quiet, --sandbox, --env, …), read off
+    the declarations themselves so a new global flag gets the placement hint without
+    a hand-maintained parallel list."""
+    return frozenset(opt for param in ctx.find_root().command.params for opt in param.opts)
+
+
+def _misplaced_flag_hint(err: NoSuchOption) -> str | None:
+    """A placement hint when a known flag landed at the wrong level, else None."""
+    ctx = err.ctx
+    if ctx is None:
+        return None
+    if ctx.parent is None:
+        if err.option_name in argscan.JSON_FLAGS:
+            return "Pass --json after the subcommand: assembly <command> --json"
+        return None
+    if err.option_name in _root_only_flags(ctx):
+        command = ctx.command_path.removeprefix("assembly ")
+        return (
+            "This is a global flag; pass it before the subcommand: "
+            f"assembly {err.option_name} {command} …"
+        )
+    return None
+
+
+def _rewrite_version_command_error(err: ClickException) -> None:
+    # There is no `version` subcommand (the reflex is `assembly --version`), and the
+    # closest-match engine would suggest an unrelated command ("Did you mean
+    # 'sessions'?"). Point at the real spelling instead.
+    if err.message.startswith("No such command 'version'"):
+        err.message = "No such command 'version'. Did you mean 'assembly --version'?"
+
+
+def _click_error_requests_json(err: ClickException) -> bool:
+    """Whether the invocation that failed to parse had opted into JSON output.
+
+    A parse error fires before any command's own ``--json`` is read, so sniff the raw
+    token list the root group stashed on the context (see ``_OrderedGroup.parse_args``
+    in main.py). A ClickException raised without a context falls back to the process
+    argv.
+    """
+    ctx = err.ctx if isinstance(err, ClickUsageError) else None
+    if ctx is not None and argscan.RAW_ARGS_META_KEY in ctx.meta:
+        raw_args: list[str] = ctx.meta[argscan.RAW_ARGS_META_KEY]
+    else:
+        raw_args = sys.argv[1:]
+    return argscan.requests_json(raw_args)
+
+
+def _format_click_error_fixed(self: ClickException) -> None:
+    # Typer's vendored Click renders flag suggestions as a stringified 1-tuple:
+    # "No such option: --jsno ('(Possible options: --json)',)". Fold the suggestion
+    # into the message ourselves so the user sees "(Possible options: --json)" — or,
+    # for a known flag passed at the wrong level, the placement hint instead of a
+    # misleading similarity guess.
+    if isinstance(self, NoSuchOption):
+        hint = _misplaced_flag_hint(self)
+        if hint is not None:
+            self.message = f"{self.message}. {hint}"
+        elif self.possibilities:
+            self.message = (
+                f"{self.message} (Possible options: {', '.join(sorted(self.possibilities))})"
+            )
+        self.possibilities = None
+    _rewrite_version_command_error(self)
+    if _click_error_requests_json(self):
+        # An invocation that opted into JSON gets the uniform {"error": …} envelope for
+        # parse errors too, mirroring the root-callback failure path; the exit code (2)
+        # is Click's and unchanged. NoArgsIsHelpError never reaches this branch: its
+        # message is the help screen and a bare invocation carries no JSON flag.
+        output.emit_error(UsageError(self.format_message()), json_mode=True)
+        return
+    _format_click_error(self)
+
+
+def _trim_completion_help() -> None:
+    # Typer's built-in `--show-completion` help is long enough to wrap several lines in
+    # the options panel. Trim it so it fits on fewer rows. The OptionInfo objects live on
+    # the completion placeholder's parameter defaults; reach the (underscore-prefixed)
+    # placeholder through the module dict so it isn't flagged as private-attribute use.
+    completion_placeholder = vars(completion)["_install_completion_placeholder_function"]
+    for opt in completion_placeholder.__defaults__ or ():
+        if isinstance(opt.help, str) and opt.help.startswith("Show completion"):
+            opt.help = "Show completion for the current shell."
+
+
+def apply() -> None:
+    """Apply every patch. Idempotent; must run before the app renders any help."""
+    # Typer's default help palette is a rainbow: option flags/command names in "bold
+    # cyan", the short switch (e.g. -p) in "bold green", and the type metavar (e.g.
+    # TEXT) in "bold yellow". Retint the whole panel into the Cobolt brand family so
+    # help reads as one monochrome hierarchy: flags and command names in the bold
+    # primary accent, their short aliases matching, and the type metavar in the
+    # lighter secondary Cobolt so it recedes.
+    rich_utils.STYLE_OPTION = f"bold {theme.BRAND}"
+    rich_utils.STYLE_COMMANDS_TABLE_FIRST_COLUMN = f"bold {theme.BRAND}"
+    rich_utils.STYLE_SWITCH = f"bold {theme.BRAND}"
+    rich_utils.STYLE_METAVAR = theme.ACCENT
+    # The usage line ("Usage: assembly [OPTIONS] COMMAND [ARGS]...") defaults to yellow.
+    # Keep the program name in the bold brand accent so it matches command names
+    # elsewhere, but drop the "Usage:" label and arg spec to muted warm gray — it's
+    # boilerplate that should recede.
+    rich_utils.STYLE_USAGE = theme.MUTED
+    rich_utils.STYLE_USAGE_COMMAND = f"bold {theme.BRAND}"
+    # Typer's own help/error consoles must also honor the closed-pipe contract: with
+    # Rich's default Console, `assembly --help | head -2` exits 1 via Console.on_broken_pipe.
+    _patch_module(rich_utils, Table=_NoClipTable, Console=theme.PipeSafeConsole)
+    rich_utils.rich_format_error = _format_click_error_fixed
+    _trim_completion_help()
diff --git a/pyproject.toml b/pyproject.toml
index 4914eafb..2e5bd6df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -131,7 +131,7 @@ artifacts = [
     "aai_cli/skills/**",
     "aai_cli/streaming/macos_system_audio.swift",
 ]
-exclude = ["**/__pycache__", "**/*.pyc"]
+exclude = ["**/__pycache__", "**/*.pyc", "**/AGENTS.md", "**/CLAUDE.md"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/scripts/check.sh b/scripts/check.sh
index b312a7d5..a89e1f14 100755
--- a/scripts/check.sh
+++ b/scripts/check.sh
@@ -208,10 +208,10 @@ fi
 
 echo "==> no new static-analysis escape hatches"
 # Existing escape hatches are tolerated for now; new ones must be refactored away or
-# justified by changing this gate deliberately. Broad noqa/type-ignore/no-cover are
-# checked by added diff lines. `Any` and `cast(` are count-gated against the
-# merge-base with origin/main so mechanical edits to existing uses don't fail, but
-# net-new uses do.
+# justified by changing this gate deliberately. All hatch classes are count-gated
+# against the merge-base with origin/main so mechanical edits — and *moving* an
+# existing hatch (refactors relocate code wholesale, which an added-line scan would
+# false-positive on) — don't fail, but net-new uses do.
 if git rev-parse --verify --quiet origin/main >/dev/null; then
   # Diff and count against the MERGE-BASE, not the origin/main tip (which the
   # mutation gate and diff-cover already do). With many concurrent branches,
@@ -220,25 +220,28 @@ if git rev-parse --verify --quiet origin/main >/dev/null; then
   # though the branch itself added nothing. The merge-base only moves when the
   # branch itself rebases.
   gate_base="$(git merge-base origin/main HEAD || echo origin/main)"
-  escape_hatches="$(git diff -U0 "$gate_base" -- aai_cli tests \
-    | rg '^\+.*(# type: ignore|# noqa|pragma: no cover)' || true)"
-  if [[ -n "$escape_hatches" ]]; then
-    printf '%s\n' "$escape_hatches"
-    echo "New static-analysis ignore/no-cover escape hatch found; refactor it or update the gate explicitly."
+  hatch_pattern='# type: ignore|# noqa|pragma: no cover'
+  base_hatch_count="$({ git grep -nE "$hatch_pattern" "$gate_base" -- aai_cli tests || true; } | wc -l | tr -d '[:space:]')"
+  work_hatch_count="$({ rg -n "$hatch_pattern" aai_cli tests || true; } | wc -l | tr -d '[:space:]')"
+  if (( work_hatch_count > base_hatch_count )); then
+    { rg -n "$hatch_pattern" aai_cli tests || true; } | tail -n 20
+    echo "New static-analysis ignore/no-cover escape hatch found: ${work_hatch_count} current vs ${base_hatch_count} at the merge-base with origin/main. Refactor it or update the gate explicitly."
     exit 1
   fi
 
   # Test-suite escape hatches, same net-new-only policy: a skip/xfail is how an agent
   # makes a red test go away instead of fixing it, and time.sleep() is the classic
   # source of flakiness (use events/polling). The legitimate existing skips guard the
-  # env-gated marker suites (e2e/install) and live on origin/main, so they aren't
-  # added diff lines and don't trip this; a genuinely-needed new one must update this
-  # gate deliberately. Scoped to tests/ — production sleeps are fine.
-  test_shortcuts="$(git diff -U0 "$gate_base" -- tests \
-    | rg '^\+.*(pytest\.skip\(|pytest\.xfail\(|@pytest\.mark\.(skip|xfail)|\btime\.sleep\()' || true)"
-  if [[ -n "$test_shortcuts" ]]; then
-    printf '%s\n' "$test_shortcuts"
-    echo "New test skip/xfail/time.sleep found; fix the test (or sync properly) or update the gate explicitly."
+  # env-gated marker suites (e2e/install) and are counted at the merge-base, so they
+  # don't trip this — and neither does moving one in a refactor; a genuinely-needed
+  # new one must update this gate deliberately. Scoped to tests/ — production sleeps
+  # are fine.
+  shortcut_pattern='pytest\.skip\(|pytest\.xfail\(|@pytest\.mark\.(skip|xfail)|\btime\.sleep\('
+  base_shortcut_count="$({ git grep -nE "$shortcut_pattern" "$gate_base" -- tests || true; } | wc -l | tr -d '[:space:]')"
+  work_shortcut_count="$({ rg -n "$shortcut_pattern" tests || true; } | wc -l | tr -d '[:space:]')"
+  if (( work_shortcut_count > base_shortcut_count )); then
+    { rg -n "$shortcut_pattern" tests || true; } | tail -n 20
+    echo "New test skip/xfail/time.sleep found: ${work_shortcut_count} current vs ${base_shortcut_count} at the merge-base with origin/main. Fix the test (or sync properly) or update the gate explicitly."
     exit 1
   fi
 
diff --git a/tests/AGENTS.md b/tests/AGENTS.md
new file mode 100644
index 00000000..ba88180a
--- /dev/null
+++ b/tests/AGENTS.md
@@ -0,0 +1,100 @@
+# tests/ — test-suite guide
+
+Scoped guidance for the test suite. Repo-wide invariants (gate, commit hooks)
+live in the root `AGENTS.md`; architecture lives in `aai_cli/AGENTS.md`.
+
+## Test markers
+
+The default suite **excludes** two slow/credentialed marker sets — `pyproject.toml`'s `addopts` carries `-m "not e2e and not install"`, so a bare `pytest` matches what `check.sh` gates. An explicit command-line `-m` overrides it for the opt-in runs:
+
+```sh
+uv run pytest -m e2e             # real-API end-to-end; needs ASSEMBLYAI_API_KEY, else skips
+uv run pytest -m install         # installs each init template's requirements for real; needs network + uv
+```
+
+`check.sh` runs the default suite with a **90% branch-coverage gate** (`--cov-fail-under=90`). New code generally needs tests to clear that gate.
+
+## Snapshot goldens
+
+CLI output is pinned by **syrupy snapshot tests** (`tests/__snapshots__/*.ambr`). Changing help text, tables, or rendered output will fail those tests until you regenerate them with `uv run pytest --snapshot-update` and commit the updated `.ambr` files. The auto-format hook only touches `*.py`, and pre-commit's whitespace fixers deliberately skip `tests/__snapshots__/` (syrupy's indentation must stay byte-for-byte), so never hand-edit a snapshot — always regenerate.
+
+The `--help` goldens are split per command group (`tests/test_snapshots_help_<group>.py`) so concurrent branches touching different commands regenerate *different* `.ambr` files. The partition (`HELP_GROUPS` in `tests/_snapshot_surface.py`) is **derived from each command module's `SPEC.panel`** (see `aai_cli/command_registry.py`), so a new command lands in the right group automatically; `tests/test_snapshots_help_groups.py` guards that the derived partition matches the live Typer tree. The root `assembly --help` screen — which every new command changes — has its own golden (`tests/test_snapshots_help_root.py`), so that churn stays confined to one trivially-regenerable `.ambr` file.
+
+## Hermeticity (enforced three ways)
+
+The suite is hermetic by construction (`tests/conftest.py` + `pyproject.toml` `[tool.pytest.ini_options]`): **pytest-randomly** shuffles order, an autouse `pin_timezone` fixture pins `TZ` to a fixed non-UTC zone (UTC-normalized rendering must be unaffected; use **time-machine** to freeze `now`), and **pytest-socket** (`--disable-socket`) blocks real network so an unmocked SDK/HTTP call fails loudly instead of hitting the API. A test that only binds a loopback server opts back in with the tight `@pytest.mark.allow_hosts(["127.0.0.1"])` (still blocks external hosts). The `e2e`/`install` marker suites legitimately reach the real network in-process (PyPI reachability probes, real-API runs), so a `pytest_collection_modifyitems` hook in `conftest.py` auto-grants them full sockets — adding a network marker is all that's needed, no per-test `enable_socket`.
+
+**Tests that touch global logging state must snapshot/restore it** — root handlers/level and per-logger levels are process-global, so a leak only fails on some pytest-randomly seeds (green locally, red in CI). Opt in to the shared `preserve_logging_state` conftest fixture (it also resets the websockets wire loggers a silencer test may have clamped) instead of hand-rolling the snapshot per module.
+
+## Writing tests that pass the diff gates
+
+Lessons that cost iterations getting the patch-coverage and mutation tail gates green:
+
+- **A boolean literal/default survives the mutation gate unless a test asserts the
+  difference between its two values**, not just that the line ran. `json_mode=False` passed
+  to `output.emit`, or `quiet=False` on `output.status`, get mutated to `True` — kill them by
+  asserting the *behavioral* split: the human branch prints bare text
+  (`result.output.strip() == "…"`, not a JSON object), or the spinner is actually entered
+  (monkeypatch `error_console.status` and assert it ran). A changed message / `prompter.note`
+  string is mutated whole, so one substring assert on the actionable keyword kills it.
+- **Help text and docstrings are pinned by the syrupy snapshots, not unit asserts** — a
+  mutated help string is killed by the regenerated `.ambr`, so `--snapshot-update` and commit
+  rather than adding redundant `--help` substring asserts.
+- **Typer's `CliRunner` merges stderr into `result.output`, and not in call order**, so don't
+  assume `splitlines()[-1]` is the command payload. In `--json` mode the env-mismatch warning
+  is its own `{"warning": …}` line, so filter parsed lines by a key the payload carries
+  (`next(o for o in objs if "env" in o)`). A monkeypatched fake must also mirror the real
+  signature — when a helper gains a kwarg (e.g. `output.status(…, quiet=…)`), doubles that
+  patch it must accept it or the call `TypeError`s.
+- **`--json` / `-j` is a per-command flag, not a root flag**: `assembly --json transcribe …` fails
+  with "No such option"; it's `assembly transcribe … --json`. (The root callback still sniffs the
+  whole token list via `argscan.requests_json`, so a callback-level failure like a bad
+  `--env` keeps the JSON error shape — but the flag itself lives on the subcommand.)
+
+The two diff-scoped tail gates are the slowest failures to discover via the full
+script; after a gate run (or any pytest run with the coverage flags below) they can
+be re-run alone:
+
+```sh
+uv run pytest -q -n auto --cov=aai_cli --cov-branch --cov-context=test --cov-report=xml  # refresh coverage data
+uv run diff-cover coverage.xml --compare-branch=origin/main --fail-under=100             # patch-coverage gate
+uv run python scripts/mutation_gate.py origin/main                                       # mutation gate
+```
+
+The gate is diff-scoped, so code predating it is never mutation-tested. To audit
+existing code (or a whole module) against the same bar, `scripts/mutation_sweep.py`
+reuses the gate's engine over *every* line of the files you name (or the whole
+package). Refresh coverage first, and pass `--timeout` to that pytest step — the
+default suite has no per-test timeout (it's opt-in; see `pyproject.toml`), so a
+deadlocked test would wedge the run instead of failing fast:
+
+```sh
+uv run pytest -q -n auto --timeout=60 --cov=aai_cli --cov-branch --cov-context=test --cov-report=
+uv run python scripts/mutation_sweep.py aai_cli/config.py   # or omit paths for the whole package
+```
+
+## Replay fixtures (offline end-to-end coverage)
+
+`tests/test_replay_e2e.py` drives whole commands (`transcribe`/`transcripts`/`llm`/
+`balance`/`usage`/`limits`) against **real** API responses recorded once and replayed
+offline — the command's own parsing/rendering runs, but pytest-socket stays armed, so
+these live in the default suite. Three moving parts:
+
+- **`tests/fixtures/api/*.json`** — scrubbed snapshots (API key/JWT redacted, `email` and
+  `account_id` faked, private `cdn.assemblyai.com/upload/…` URLs redacted). Committed and
+  gitleaks-clean; treat them like syrupy snapshots (regenerate, don't hand-edit).
+- **`scripts/record_fixtures.py`** — the recorder. It is **deliberately outside the gate**
+  (it hits the network) and is *not* mypy/pyright-checked (only ruff covers `scripts/`).
+  Refresh after an API shape change: `ASSEMBLYAI_API_KEY=… uv run python scripts/record_fixtures.py`.
+  The key comes from the env; the AMS session JWT + `account_id` from the keyring/`config.toml`
+  of whoever ran `assembly login` (profile `default`) — neither is ever written to a fixture.
+- **`tests/replay_fixtures.py`** — rebuilds the boundary objects from JSON. A transcript is a
+  real `aai.Transcript` via `Transcript.from_response`; an LLM response is rebuilt with
+  `ChatCompletion.model_construct` (**not** `model_validate`) because the gateway returns
+  Anthropic-flavored fields — `finish_reason="end_turn"`, token counts under
+  `input_tokens`/`output_tokens` — that strict validation rejects but the OpenAI SDK itself
+  parses leniently.
+
+The replay tests patch the same boundary the unit tests do
+(`commands.<cmd>.client.<fn>` / `.ams.<fn>` / `.gateway.complete`); the only difference is
+the return value comes from a recorded payload instead of a hand-built mock.
diff --git a/tests/CLAUDE.md b/tests/CLAUDE.md
new file mode 120000
index 00000000..47dc3e3d
--- /dev/null
+++ b/tests/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/tests/__snapshots__/test_snapshots_help_root.ambr b/tests/__snapshots__/test_snapshots_help_root.ambr
new file mode 100644
index 00000000..79962711
--- /dev/null
+++ b/tests/__snapshots__/test_snapshots_help_root.ambr
@@ -0,0 +1,92 @@
+# serializer version: 1
+# name: test_root_help_matches_snapshot
+  '''
+  
+   Usage: assembly [OPTIONS] COMMAND [ARGS]...
+  
+  ╭─ Options ────────────────────────────────────────────────────────────────────╮
+  │ --profile             -p      TEXT     Named credential profile.             │
+  │ --env                         TEXT     Backend environment (production,      │
+  │                                        sandbox000).                          │
+  │ --sandbox                              Shortcut for --env sandbox000.        │
+  │ --quiet               -q               Suppress non-essential messages       │
+  │                                        (warnings, hints).                    │
+  │ --verbose             -v      INTEGER  Log diagnostics to stderr (-v:        │
+  │                                        requests, -vv: wire-level detail).    │
+  │                                        [default: 0]                          │
+  │ --version             -V               Show the CLI version and exit.        │
+  │ --install-completion                   Install completion for the current    │
+  │                                        shell.                                │
+  │ --show-completion                      Show completion for the current       │
+  │                                        shell.                                │
+  │ --help                                 Show this message and exit.           │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ Quick Start ────────────────────────────────────────────────────────────────╮
+  │ onboard      Guided setup: sign in, run your first transcription, and start  │
+  │              building.                                                       │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ Build an App ───────────────────────────────────────────────────────────────╮
+  │ init         Scaffold a new project from a template, then launch it.         │
+  │ dev          Launch the dev server for the app in the current directory.     │
+  │ share        Boot the app and expose it on a public URL via a cloudflared    │
+  │              tunnel.                                                         │
+  │ deploy       Deploy the current project to Vercel (default), Railway, or     │
+  │              Fly.io.                                                         │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ Run AssemblyAI ─────────────────────────────────────────────────────────────╮
+  │ transcribe   Transcribe an audio file, URL, or YouTube/podcast link — or a   │
+  │              whole batch.                                                    │
+  │ stream       Transcribe live audio in real time — from your mic, a file, a   │
+  │              URL, or a pipe.                                                 │
+  │ dictate      Dictate with a hotkey: record the mic, get the transcript back  │
+  │              instantly.                                                      │
+  │ agent        Have a live two-way voice conversation with an AssemblyAI voice │
+  │              agent.                                                          │
+  │ speak        Synthesize speech from text with AssemblyAI streaming TTS       │
+  │              (sandbox only).                                                 │
+  │ llm          Send a prompt to AssemblyAI's LLM Gateway and print the         │
+  │              response.                                                       │
+  │ clip         Cut clips out of a media file by speaker, text match, LLM pick, │
+  │              or time range.                                                  │
+  │ dub          Dub a video or audio file into another language (sandbox only). │
+  │ caption      Burn always-visible captions into a video.                      │
+  │ eval         Transcribe an evaluation dataset and score WER against its      │
+  │              reference texts.                                                │
+  │ webhooks     Receive webhook deliveries on a public dev URL.                 │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ Setup & Tools ──────────────────────────────────────────────────────────────╮
+  │ doctor       Check that your environment is ready to use AssemblyAI.         │
+  │ setup        Set up your coding agent for AssemblyAI (docs MCP + skills).    │
+  │ telemetry    Anonymous usage telemetry: status, enable, disable.             │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ History ────────────────────────────────────────────────────────────────────╮
+  │ transcripts  Browse and fetch past transcripts.                              │
+  │ sessions     Browse your past streaming (real-time) sessions.                │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ Account ────────────────────────────────────────────────────────────────────╮
+  │ login        Authenticate via your browser; stores a CLI API key.            │
+  │ logout       Clear stored credentials for the active profile.                │
+  │ whoami       Show the active profile and whether its key is usable.          │
+  │ balance      Show your remaining account balance.                            │
+  │ usage        Show usage over a date range (defaults to the last 30 days).    │
+  │ limits       Show your account's rate limits per service.                    │
+  │ keys         List, create, and rename your AssemblyAI API keys.              │
+  │ audit        List recent audit-log entries for your account.                 │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  
+   Examples
+   Guided setup (start here)
+   $ assembly onboard
+   Transcribe a file
+   $ assembly transcribe call.mp3
+   Stream live audio in real time
+   $ assembly stream
+   Talk to a voice agent
+   $ assembly agent
+   Summarize while transcribing
+   $ assembly transcribe call.mp3 --llm "summarize action items"
+  
+  
+  
+  '''
+# ---
diff --git a/tests/_snapshot_surface.py b/tests/_snapshot_surface.py
index 1b7f94f5..30a59ac5 100644
--- a/tests/_snapshot_surface.py
+++ b/tests/_snapshot_surface.py
@@ -3,9 +3,12 @@
 The ``--help`` goldens are split into one module per command group
 (``tests/test_snapshots_help_<group>.py``) so concurrent branches that touch
 different commands regenerate *different* ``.ambr`` files instead of all
-conflicting in a single snapshot file. ``HELP_GROUPS`` is the partition;
-``tests/test_snapshots_help_groups.py`` guards that it stays complete and
-disjoint, so a new top-level command fails loudly until it is assigned a group.
+conflicting in a single snapshot file. ``HELP_GROUPS`` is the partition,
+**derived** from each command module's ``SPEC.panel`` declaration (see
+``aai_cli.command_registry``) so a new command is assigned a group by the same
+declaration that registers it — no parallel dict to keep in sync.
+``tests/test_snapshots_help_groups.py`` still guards that the derived partition
+matches the live Typer tree, so a misdeclared ``SPEC`` fails loudly.
 """
 
 from __future__ import annotations
@@ -15,36 +18,34 @@
 from syrupy.assertion import SnapshotAssertion
 from typer.testing import CliRunner
 
+from aai_cli import command_registry, help_panels
 from aai_cli.main import app
 from tests._cli_tree import leaf_command_argvs
 
-# Top-level command name -> snapshot module group, mirroring the help panels in
-# aai_cli.main._COMMAND_ORDER (plus the hidden _update-check). The keys are the
-# ``tests/test_snapshots_help_<group>.py`` module suffixes.
-HELP_GROUPS: dict[str, frozenset[str]] = {
-    "build": frozenset({"onboard", "init", "dev", "share", "deploy"}),
-    "run": frozenset(
-        {
-            "transcribe",
-            "stream",
-            "dictate",
-            "agent",
-            "speak",
-            "llm",
-            "clip",
-            "dub",
-            "caption",
-            "eval",
-            "webhooks",
-        }
-    ),
-    "tools": frozenset({"doctor", "setup", "telemetry", "_update-check"}),
-    "history": frozenset({"transcripts", "sessions"}),
-    "account": frozenset(
-        {"login", "logout", "whoami", "balance", "usage", "limits", "keys", "audit"}
-    ),
+# Help panel -> snapshot module group (the ``tests/test_snapshots_help_<group>.py``
+# module suffix). A brand-new panel must be mapped here before its commands ship.
+PANEL_TO_GROUP: dict[str, str] = {
+    help_panels.QUICK_START: "build",
+    help_panels.BUILD: "build",
+    help_panels.TRANSCRIPTION: "run",
+    help_panels.SETUP: "tools",
+    help_panels.HISTORY: "history",
+    help_panels.ACCOUNT: "account",
 }
 
+
+def _derive_help_groups() -> dict[str, frozenset[str]]:
+    groups: dict[str, set[str]] = {group: set() for group in PANEL_TO_GROUP.values()}
+    for registered in command_registry.discover():
+        groups[PANEL_TO_GROUP[registered.spec.panel]].update(registered.spec.commands)
+    # The hidden _update-check is registered directly in main.py, not via a SPEC.
+    groups["tools"].add("_update-check")
+    return {group: frozenset(names) for group, names in groups.items()}
+
+
+# Top-level command name -> snapshot module group, derived from the registry.
+HELP_GROUPS: dict[str, frozenset[str]] = _derive_help_groups()
+
 _runner = CliRunner()
 
 # Matches SGR (color/style) ANSI escape sequences.
diff --git a/tests/setup_helpers.py b/tests/setup_helpers.py
index a4ed5652..dd5ce6ef 100644
--- a/tests/setup_helpers.py
+++ b/tests/setup_helpers.py
@@ -57,7 +57,7 @@ def __call__(self, cmd, *args, **kwargs):
 
 def _all_tools_present(monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.setup.shutil.which",
+        "aai_cli.setup_exec.shutil.which",
         lambda tool: f"/usr/bin/{tool}",
     )
 
diff --git a/tests/test_command_registry.py b/tests/test_command_registry.py
new file mode 100644
index 00000000..5b1e199b
--- /dev/null
+++ b/tests/test_command_registry.py
@@ -0,0 +1,92 @@
+"""The convention-based command registry (aai_cli/command_registry.py).
+
+Discovery is exercised against the real ``aai_cli.commands`` package; the
+rejection paths use fake modules injected into ``sys.modules`` so a module that
+forgets (or misdeclares) its ``SPEC`` is proven to fail loudly at import time
+rather than silently dropping out of the CLI.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import sys
+import types
+
+import pytest
+import typer
+
+from aai_cli import command_registry, help_panels
+
+
+def _fake_module(name: str, **attrs):
+    module = types.ModuleType(name)
+    for key, value in attrs.items():
+        setattr(module, key, value)
+    return module
+
+
+_VALID_SPEC = command_registry.CommandModuleSpec(
+    panel=help_panels.TRANSCRIPTION, order=10, commands=("fake",)
+)
+
+
+def test_spec_and_registration_are_immutable():
+    # Specs are shared module-level singletons read at every discovery; freezing them
+    # is load-bearing, not decoration — a mutated spec would silently reorder help.
+    # The attribute names are typed plain `str` so the type checkers permit the
+    # runtime probe of what they statically know is read-only.
+    order_field: str = "order"
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        setattr(_VALID_SPEC, order_field, 99)
+    registered = command_registry.RegisteredModule(spec=_VALID_SPEC, app=typer.Typer())
+    spec_field: str = "spec"
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        setattr(registered, spec_field, _VALID_SPEC)
+
+
+def test_module_without_spec_is_rejected(monkeypatch):
+    name = "aai_cli.commands._fake_missing_spec"
+    monkeypatch.setitem(sys.modules, name, _fake_module(name, app=typer.Typer()))
+    with pytest.raises(TypeError, match="SPEC = CommandModuleSpec"):
+        command_registry._load(name)
+
+
+def test_module_with_unknown_panel_is_rejected(monkeypatch):
+    bad_spec = command_registry.CommandModuleSpec(
+        panel="No Such Panel", order=10, commands=("fake",)
+    )
+    name = "aai_cli.commands._fake_bad_panel"
+    monkeypatch.setitem(sys.modules, name, _fake_module(name, SPEC=bad_spec, app=typer.Typer()))
+    with pytest.raises(RuntimeError, match="unknown help panel 'No Such Panel'"):
+        command_registry._load(name)
+
+
+def test_module_without_typer_app_is_rejected(monkeypatch):
+    name = "aai_cli.commands._fake_no_app"
+    monkeypatch.setitem(sys.modules, name, _fake_module(name, SPEC=_VALID_SPEC))
+    with pytest.raises(TypeError, match=r"app = typer\.Typer"):
+        command_registry._load(name)
+
+
+def test_load_returns_spec_and_app(monkeypatch):
+    name = "aai_cli.commands._fake_valid"
+    sub_app = typer.Typer()
+    monkeypatch.setitem(sys.modules, name, _fake_module(name, SPEC=_VALID_SPEC, app=sub_app))
+    registered = command_registry._load(name)
+    assert registered.spec is _VALID_SPEC
+    assert registered.app is sub_app
+
+
+def test_discovery_renders_panels_contiguously_in_panel_order():
+    registered = command_registry.discover()
+    ranks = [help_panels.PANEL_ORDER.index(reg.spec.panel) for reg in registered]
+    assert ranks == sorted(ranks)  # panels stay contiguous, in PANEL_ORDER order
+    assert {reg.spec.panel for reg in registered} == set(help_panels.PANEL_ORDER)
+
+
+def test_command_order_lists_every_declared_command_exactly_once():
+    registered = command_registry.discover()
+    order = command_registry.command_order(registered)
+    assert len(order) == len(set(order))  # no module may claim another's command name
+    assert set(order) == {name for reg in registered for name in reg.spec.commands}
+    assert order[0] == "onboard"  # Quick Start renders first
diff --git a/tests/test_doctor.py b/tests/test_doctor.py
index a04fba6e..e30bdb97 100644
--- a/tests/test_doctor.py
+++ b/tests/test_doctor.py
@@ -7,7 +7,7 @@
 from typer.testing import CliRunner
 
 from aai_cli import config
-from aai_cli.commands import doctor
+from aai_cli import doctor_checks as doctor
 from aai_cli.errors import APIError
 from aai_cli.main import app
 
@@ -18,12 +18,12 @@
 def healthy(monkeypatch):
     """A fully-ready environment: valid key, all tools present, a microphone."""
     config.set_api_key("default", "sk_1234567890")
-    monkeypatch.setattr("aai_cli.commands.doctor.client.validate_key", lambda _key: True)
-    monkeypatch.setattr("aai_cli.commands.doctor.shutil.which", lambda tool: f"/usr/bin/{tool}")
-    monkeypatch.setattr("aai_cli.commands.doctor._probe_input_devices", lambda: 2)
+    monkeypatch.setattr("aai_cli.doctor_checks.client.validate_key", lambda _key: True)
+    monkeypatch.setattr("aai_cli.doctor_checks.shutil.which", lambda tool: f"/usr/bin/{tool}")
+    monkeypatch.setattr("aai_cli.doctor_checks._probe_input_devices", lambda: 2)
     # The MCP probe shells out to `claude mcp get`; keep the suite hermetic and
     # report the full setup (docs MCP + both skills) as installed.
-    monkeypatch.setattr("aai_cli.commands.doctor.coding_agent.missing_components", list)
+    monkeypatch.setattr("aai_cli.doctor_checks.coding_agent.missing_components", list)
 
 
 def _checks(result):
@@ -51,7 +51,7 @@ def test_doctor_no_keyring_recommends_env_var(healthy, monkeypatch):
     # On a box with no usable keyring, `assembly login` can't persist a key either, so the
     # fix must point at ASSEMBLYAI_API_KEY rather than a dead-end browser login.
     config.clear_api_key("default")
-    monkeypatch.setattr("aai_cli.commands.doctor.config.keyring_usable", lambda: False)
+    monkeypatch.setattr("aai_cli.doctor_checks.config.keyring_usable", lambda: False)
     result = runner.invoke(app, ["doctor", "--json"])
     assert result.exit_code == 1
     api = _checks(result)["api-key"]
@@ -68,7 +68,7 @@ def test_doctor_success_suggests_trying_transcribe(healthy, monkeypatch):
 
 
 def test_doctor_rejected_key_fails(healthy, monkeypatch):
-    monkeypatch.setattr("aai_cli.commands.doctor.client.validate_key", lambda _key: False)
+    monkeypatch.setattr("aai_cli.doctor_checks.client.validate_key", lambda _key: False)
     result = runner.invoke(app, ["doctor", "--json"])
     assert result.exit_code == 1
     api = _checks(result)["api-key"]
@@ -84,7 +84,7 @@ def test_doctor_network_error_is_a_failure(healthy, monkeypatch):
     def boom(_key):
         raise APIError("Network error contacting AssemblyAI: timeout")
 
-    monkeypatch.setattr("aai_cli.commands.doctor.client.validate_key", boom)
+    monkeypatch.setattr("aai_cli.doctor_checks.client.validate_key", boom)
     result = runner.invoke(app, ["doctor", "--json"])
     assert result.exit_code == 1
     api = _checks(result)["api-key"]
@@ -94,7 +94,7 @@ def boom(_key):
 
 def test_doctor_ffmpeg_missing_warns_but_passes(healthy, monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.doctor.shutil.which",
+        "aai_cli.doctor_checks.shutil.which",
         lambda tool: None if tool == "ffmpeg" else f"/usr/bin/{tool}",
     )
     result = runner.invoke(app, ["doctor", "--json"])
@@ -110,7 +110,7 @@ def test_doctor_audio_unavailable_warns_but_passes(healthy, monkeypatch):
     def no_audio():
         raise ImportError("no sounddevice")
 
-    monkeypatch.setattr("aai_cli.commands.doctor._probe_input_devices", no_audio)
+    monkeypatch.setattr("aai_cli.doctor_checks._probe_input_devices", no_audio)
     result = runner.invoke(app, ["doctor", "--json"])
     assert result.exit_code == 0
     audio = _checks(result)["audio"]
@@ -119,7 +119,7 @@ def no_audio():
 
 
 def test_doctor_no_microphone_warns(healthy, monkeypatch):
-    monkeypatch.setattr("aai_cli.commands.doctor._probe_input_devices", lambda: 0)
+    monkeypatch.setattr("aai_cli.doctor_checks._probe_input_devices", lambda: 0)
     result = runner.invoke(app, ["doctor", "--json"])
     assert result.exit_code == 0
     assert _checks(result)["audio"]["status"] == "warn"
@@ -136,7 +136,7 @@ def test_doctor_coding_agent_fully_set_up_does_not_suggest_install(healthy):
 
 def test_doctor_coding_agent_not_set_up_names_whats_missing(healthy, monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.doctor.coding_agent.missing_components",
+        "aai_cli.doctor_checks.coding_agent.missing_components",
         lambda: ["docs MCP", "aai-cli skill"],
     )
     result = runner.invoke(app, ["doctor", "--json"])
@@ -150,7 +150,7 @@ def test_doctor_coding_agent_not_set_up_names_whats_missing(healthy, monkeypatch
 
 def test_doctor_coding_agent_missing_warns(healthy, monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.doctor.shutil.which",
+        "aai_cli.doctor_checks.shutil.which",
         lambda tool: None if tool in ("claude", "npx") else f"/usr/bin/{tool}",
     )
     result = runner.invoke(app, ["doctor", "--json"])
@@ -189,7 +189,7 @@ def test_doctor_network_fix_names_active_env_host(healthy, monkeypatch):
     def boom(_key):
         raise APIError("Network error contacting AssemblyAI: timeout")
 
-    monkeypatch.setattr("aai_cli.commands.doctor.client.validate_key", boom)
+    monkeypatch.setattr("aai_cli.doctor_checks.client.validate_key", boom)
     result = runner.invoke(app, ["--env", "sandbox000", "doctor", "--json"])
     fix = _checks(result)["api-key"]["fix"]
     assert "that api.sandbox000.assemblyai-labs.com is reachable" in fix
@@ -300,6 +300,21 @@ def test_render_omits_profile_line_for_partial_payloads() -> None:
     assert "assembly transcribe --sample" not in text
 
 
+def test_render_omits_profile_line_when_only_one_key_is_present() -> None:
+    # The context line needs BOTH keys: rendering it with only `profile` set would
+    # interpolate a missing environment. Distinct from the neither-key case above —
+    # this is the payload shape that distinguishes the `and` from an `or`.
+    payload: doctor.DoctorResult = {
+        "ok": True,
+        "profile": "default",
+        "checks": [
+            {"name": "python", "status": "ok", "affects": [], "detail": "3.12", "fix": None}
+        ],
+    }
+    text = doctor.render(payload)
+    assert "profile:" not in text
+
+
 def test_doctor_human_output_shows_profile_and_environment(healthy, monkeypatch):
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
     result = runner.invoke(app, ["doctor"])
diff --git a/tests/test_help_rendering.py b/tests/test_help_rendering.py
index 42d7960c..ae2d6fa1 100644
--- a/tests/test_help_rendering.py
+++ b/tests/test_help_rendering.py
@@ -1,6 +1,6 @@
 """Rendering guards for the help screens and Click error formatting.
 
-These pin the two patches main.py applies to Typer's rich rendering: flag-name
+These pin the patches typer_patches.py applies to Typer's rich rendering: flag-name
 columns must never be clipped to an unreadable "--end-of-turn-c…" at a standard
 80-column terminal, and unknown-flag suggestions must not leak a tuple repr.
 """
@@ -150,7 +150,7 @@ def test_version_command_suggests_version_flag():
 def test_misplaced_flag_hint_without_context_is_none():
     from typer._click.exceptions import NoSuchOption
 
-    from aai_cli.main import _misplaced_flag_hint
+    from aai_cli.typer_patches import _misplaced_flag_hint
 
     assert _misplaced_flag_hint(NoSuchOption("--json")) is None
 
@@ -160,7 +160,7 @@ def test_click_error_without_context_falls_back_to_argv(monkeypatch, capsys):
     # formatter then sniffs the real process argv for the JSON opt-in.
     from typer._click.exceptions import ClickException
 
-    from aai_cli.main import _format_click_error_fixed
+    from aai_cli.typer_patches import _format_click_error_fixed
 
     monkeypatch.setattr(sys, "argv", ["assembly", "--json"])
     _format_click_error_fixed(ClickException("boom"))
@@ -175,7 +175,7 @@ def test_click_error_without_context_falls_back_to_argv(monkeypatch, capsys):
 
 
 def test_noclip_table_pins_leading_columns_and_passes_row_args_through():
-    from aai_cli.main import _NoClipTable
+    from aai_cli.typer_patches import _NoClipTable
 
     table = _NoClipTable()
     table.add_row("--flag", "META", "help text")
diff --git a/tests/test_importlinter_coverage.py b/tests/test_importlinter_coverage.py
new file mode 100644
index 00000000..a3f9d3dc
--- /dev/null
+++ b/tests/test_importlinter_coverage.py
@@ -0,0 +1,57 @@
+"""Guard: the import-linter contracts must cover every module in the package.
+
+Contract 1 ("core modules do not import command modules") enumerates its source
+modules by name, so a newly added module would be silently *uncovered* — worse
+than a merge conflict, because nothing fails until the architecture has already
+drifted (this is exactly how `onboard` once grew imports of command modules
+unnoticed). This test compares the enumerated list against the filesystem so a
+new top-level module fails loudly until it is added to `.importlinter` (or to
+the deliberate exemption list below).
+
+Contract 2 needs no guard: it wildcards over ``aai_cli.commands.*``.
+"""
+
+from __future__ import annotations
+
+import configparser
+from pathlib import Path
+
+import aai_cli
+
+# Modules that legitimately import aai_cli.commands and so are deliberately
+# outside contract 1: main registers the discovered command apps,
+# command_registry performs that discovery, and commands is the layer itself.
+EXEMPT = {"aai_cli.main", "aai_cli.command_registry", "aai_cli.commands"}
+
+_REPO_ROOT = Path(aai_cli.__file__).resolve().parent.parent
+
+
+def _top_level_modules() -> set[str]:
+    package_dir = Path(aai_cli.__file__).resolve().parent
+    modules: set[str] = set()
+    for path in package_dir.iterdir():
+        if path.name.startswith("_"):
+            continue
+        if path.is_dir() and (path / "__init__.py").exists():
+            modules.add(f"aai_cli.{path.name}")
+        elif path.suffix == ".py":
+            modules.add(f"aai_cli.{path.stem}")
+    return modules
+
+
+def _contract_one_sources() -> set[str]:
+    parser = configparser.ConfigParser()
+    parser.read(_REPO_ROOT / ".importlinter")
+    return set(parser["importlinter:contract:1"]["source_modules"].split())
+
+
+def test_every_core_module_is_covered_by_contract_one():
+    listed = _contract_one_sources()
+    actual = _top_level_modules()
+    missing = sorted(actual - listed - EXEMPT)
+    assert missing == [], (
+        f"new top-level module(s) {missing} are not covered by .importlinter contract 1; "
+        "add them to source_modules (or to EXEMPT here if they may import commands)"
+    )
+    stale = sorted(listed - actual)
+    assert stale == [], f".importlinter contract 1 lists module(s) that no longer exist: {stale}"
diff --git a/tests/test_onboard_environment.py b/tests/test_onboard_environment.py
index 6f735cc5..a486f2f4 100644
--- a/tests/test_onboard_environment.py
+++ b/tests/test_onboard_environment.py
@@ -39,9 +39,9 @@ def _mk(name: str, status: str):
         }
         return lambda: check
 
-    monkeypatch.setattr("aai_cli.commands.doctor.check_python", _mk("python", python))
-    monkeypatch.setattr("aai_cli.commands.doctor.check_ffmpeg", _mk("ffmpeg", ffmpeg))
-    monkeypatch.setattr("aai_cli.commands.doctor.check_audio", _mk("audio", audio))
+    monkeypatch.setattr("aai_cli.doctor_checks.check_python", _mk("python", python))
+    monkeypatch.setattr("aai_cli.doctor_checks.check_ffmpeg", _mk("ffmpeg", ffmpeg))
+    monkeypatch.setattr("aai_cli.doctor_checks.check_audio", _mk("audio", audio))
 
 
 def test_environment_all_ok_says_everything_looks_good(
diff --git a/tests/test_onboard_sections.py b/tests/test_onboard_sections.py
index c2f9600f..3fbd302f 100644
--- a/tests/test_onboard_sections.py
+++ b/tests/test_onboard_sections.py
@@ -8,7 +8,7 @@
 import typer
 
 from aai_cli import init_exec, output, transcribe_exec, transcribe_render
-from aai_cli.commands import setup as setup_cmd
+from aai_cli import setup_exec as setup_cmd
 from aai_cli.context import AppState
 from aai_cli.errors import CLIError
 from aai_cli.onboard import sections
diff --git a/tests/test_setup.py b/tests/test_setup.py
index eca78a0c..0b3080b1 100644
--- a/tests/test_setup.py
+++ b/tests/test_setup.py
@@ -26,7 +26,7 @@ def _force_json(monkeypatch):
 
 
 def test_proc_detail_prefers_stderr_then_falls_back_to_stdout():
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     # stderr wins when present (pins `proc.stderr or proc.stdout`); stdout is the
     # fallback when stderr is empty.
@@ -47,7 +47,7 @@ def test_remove_skill_failure_reports_failed(monkeypatch):
     # MCP absent (so only the skill step can fail) and `npx skills remove` runs but
     # leaves the skill in place -> remove must report it as failed, not removed.
     monkeypatch.setattr(
-        "aai_cli.commands.setup.subprocess.run",
+        "aai_cli.setup_exec.subprocess.run",
         FakeRun({("claude", "mcp", "get"): 1}, removes_skill=False),
     )
 
@@ -66,14 +66,14 @@ def test_remove_skill_skipped_when_npx_missing(monkeypatch):
     # The assemblyai skill is present but npx is gone -> we can't drive `skills
     # remove`, so report skipped (not failed).
     monkeypatch.setattr(
-        "aai_cli.commands.setup.shutil.which",
+        "aai_cli.setup_exec.shutil.which",
         lambda tool: None if tool == "npx" else f"/usr/bin/{tool}",
     )
     skill = _skill_path()
     skill.mkdir(parents=True)
     (skill / "SKILL.md").write_text("# AssemblyAI")
     monkeypatch.setattr(
-        "aai_cli.commands.setup.subprocess.run",
+        "aai_cli.setup_exec.subprocess.run",
         FakeRun({("claude", "mcp", "get"): 1}),
     )
 
@@ -92,7 +92,7 @@ def test_remove_unwinds_all(monkeypatch, tmp_path):
         d.mkdir(parents=True)
         (d / "SKILL.md").write_text("# x")
     fake = FakeRun({("claude", "mcp", "get"): 0})  # present -> removable
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "remove"])
     assert result.exit_code == 0
@@ -109,7 +109,7 @@ def test_remove_unwinds_all(monkeypatch, tmp_path):
 def test_remove_when_absent_is_not_an_error(monkeypatch):
     _all_tools_present(monkeypatch)  # no skill dirs
     fake = FakeRun({("claude", "mcp", "get"): 1})  # absent
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "remove"])
     assert result.exit_code == 0
@@ -124,7 +124,7 @@ def test_remove_when_absent_is_not_an_error(monkeypatch):
 def test_remove_scope_passthrough(monkeypatch):
     _all_tools_present(monkeypatch)
     fake = FakeRun({("claude", "mcp", "get"): 0})  # present
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "remove", "--scope", "project"])
     assert result.exit_code == 0
@@ -133,18 +133,18 @@ def test_remove_scope_passthrough(monkeypatch):
 
 def test_remove_invalid_scope_exits_2(monkeypatch):
     _all_tools_present(monkeypatch)
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", FakeRun())
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", FakeRun())
     result = runner.invoke(app, ["setup", "remove", "--scope", "bogus"])
     assert result.exit_code == 2
 
 
 def test_remove_skips_mcp_when_claude_missing(monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.setup.shutil.which",
+        "aai_cli.setup_exec.shutil.which",
         lambda tool: None if tool == "claude" else f"/usr/bin/{tool}",
     )
     fake = FakeRun()
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "remove"])
     assert result.exit_code == 0
@@ -156,7 +156,7 @@ def test_remove_mcp_failure_reports_failed(monkeypatch):
     _all_tools_present(monkeypatch)
     # present, but `mcp remove` fails -> the mcp step is failed and exit is non-zero.
     fake = FakeRun({("claude", "mcp", "get"): 0, ("claude", "mcp", "remove"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "remove"])
     assert result.exit_code == 1
@@ -168,7 +168,7 @@ def test_remove_mcp_failure_reports_failed(monkeypatch):
 
 def test_copy_tree_skips_pycache_and_pyc(tmp_path):
     # _copy_tree must not copy compiled-Python detritus into the agent's skills dir.
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     src = tmp_path / "src"
     (src / "references").mkdir(parents=True)
@@ -190,7 +190,7 @@ def test_copy_tree_skips_pycache_and_pyc(tmp_path):
 def test_copy_tree_creates_missing_parent_dirs(tmp_path):
     # The destination's parents may not exist yet (~/.claude/skills on a fresh
     # machine); _copy_tree must create the whole chain (mkdir parents=True).
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     src = tmp_path / "src"
     src.mkdir()
@@ -204,7 +204,7 @@ def test_copy_tree_creates_missing_parent_dirs(tmp_path):
 def test_copy_tree_into_existing_dir_is_tolerated(tmp_path):
     # _copy_tree may run with the destination already present (a forced reinstall over
     # an existing skill dir); the mkdir must tolerate it (exist_ok=True), not raise.
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     src = tmp_path / "src"
     src.mkdir()
@@ -251,7 +251,7 @@ def test_setup_no_subcommand_lists_commands():
 
 
 def test_install_cli_skill_fails_when_bundle_missing(monkeypatch, tmp_path):
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     monkeypatch.setattr(setup, "_bundled_cli_skill", lambda: tmp_path / "nonexistent")
     step = setup.install_cli_skill(force=False)
@@ -260,7 +260,7 @@ def test_install_cli_skill_fails_when_bundle_missing(monkeypatch, tmp_path):
 
 
 def test_install_cli_skill_fails_when_copy_lacks_skill_md(monkeypatch, tmp_path):
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     empty = tmp_path / "emptybundle"
     empty.mkdir()
@@ -271,13 +271,13 @@ def test_install_cli_skill_fails_when_copy_lacks_skill_md(monkeypatch, tmp_path)
 
 
 def test_remove_cli_skill_fails_when_rmtree_noops(monkeypatch):
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     dest = _cli_skill_path()
     dest.mkdir(parents=True)
     (dest / "SKILL.md").write_text("# x")
     monkeypatch.setattr(setup.shutil, "rmtree", lambda *a, **k: None)
-    step = setup._remove_cli_skill()
+    step = setup.remove_cli_skill()
     assert step["status"] == "failed"
     assert "still present" in step["detail"]
 
@@ -286,7 +286,7 @@ def test_remove_cli_skill_tolerates_rmtree_error(monkeypatch):
     # Removal is best-effort (ignore_errors=True): a deletion failure must surface as a
     # clean "failed" step (skill still present), never an uncaught OSError. Without
     # ignore_errors, rmtree would raise instead of returning.
-    from aai_cli.commands import setup
+    from aai_cli import setup_exec as setup
 
     dest = _cli_skill_path()
     dest.mkdir(parents=True)
@@ -297,6 +297,6 @@ def rmtree(path, ignore_errors=False, **kwargs):
             raise OSError("permission denied")  # what a non-ignoring rmtree would do
 
     monkeypatch.setattr(setup.shutil, "rmtree", rmtree)
-    step = setup._remove_cli_skill()
+    step = setup.remove_cli_skill()
     assert step["status"] == "failed"
     assert "still present" in step["detail"]
diff --git a/tests/test_setup_install.py b/tests/test_setup_install.py
index 6a1f0c21..184aeac7 100644
--- a/tests/test_setup_install.py
+++ b/tests/test_setup_install.py
@@ -38,7 +38,7 @@ def test_install_happy_path_runs_all_steps(monkeypatch):
     _all_tools_present(monkeypatch)
     # MCP not yet present -> `mcp get` returns non-zero.
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 0
@@ -75,7 +75,7 @@ def test_install_skill_failed_when_npx_succeeds_but_nothing_installed(monkeypatc
     # code — otherwise install says "installed" while status says "not_installed".
     _all_tools_present(monkeypatch)
     fake = FakeRun({("claude", "mcp", "get"): 1}, creates_skill=False)
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 1  # skill step failed
@@ -102,7 +102,7 @@ def record(cmd, *args, **kwargs):
         seen.append((list(cmd), kwargs))
         return subprocess.CompletedProcess(args=cmd, returncode=1, stdout="", stderr="")
 
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", record)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", record)
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code in (0, 1)
     assert seen, "expected subprocess.run to be called"
@@ -121,7 +121,7 @@ def record(cmd, *args, **kwargs):
 def test_install_scope_passthrough(monkeypatch):
     _all_tools_present(monkeypatch)
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install", "--scope", "project"])
     assert result.exit_code == 0
@@ -141,7 +141,7 @@ def test_install_scope_passthrough(monkeypatch):
 def test_install_scope_local_passthrough(monkeypatch):
     _all_tools_present(monkeypatch)
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install", "--scope", "local"])
     assert result.exit_code == 0
@@ -160,7 +160,7 @@ def test_install_scope_local_passthrough(monkeypatch):
 
 def test_install_invalid_scope_exits_2(monkeypatch):
     _all_tools_present(monkeypatch)
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", FakeRun())
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", FakeRun())
     result = runner.invoke(app, ["setup", "install", "--scope", "bogus"])
     assert result.exit_code == 2
 
@@ -169,7 +169,7 @@ def test_install_idempotent_when_mcp_present(monkeypatch):
     _all_tools_present(monkeypatch)
     # `mcp get` returns 0 -> already registered.
     fake = FakeRun({("claude", "mcp", "get"): 0})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 0
@@ -182,7 +182,7 @@ def test_install_failure_exits_nonzero(monkeypatch):
     _all_tools_present(monkeypatch)
     # mcp not present, but `mcp add` fails.
     fake = FakeRun({("claude", "mcp", "get"): 1, ("claude", "mcp", "add"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 1
@@ -193,7 +193,7 @@ def test_install_force_remove_failure_reports_failed(monkeypatch):
     _all_tools_present(monkeypatch)
     # present, but the forced remove fails
     fake = FakeRun({("claude", "mcp", "get"): 0, ("claude", "mcp", "remove"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install", "--force"])
     assert result.exit_code == 1
@@ -204,7 +204,7 @@ def test_install_force_remove_failure_reports_failed(monkeypatch):
 def test_install_force_removes_then_adds(monkeypatch):
     _all_tools_present(monkeypatch)
     fake = FakeRun({("claude", "mcp", "get"): 0})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install", "--force"])
     assert result.exit_code == 0
@@ -214,11 +214,11 @@ def test_install_force_removes_then_adds(monkeypatch):
 
 def test_install_skips_mcp_when_claude_missing(monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.setup.shutil.which",
+        "aai_cli.setup_exec.shutil.which",
         lambda tool: None if tool == "claude" else f"/usr/bin/{tool}",
     )
     fake = FakeRun()
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 0  # skip is not a failure
@@ -241,7 +241,7 @@ def test_install_skill_idempotent_when_present(monkeypatch):
     skill.mkdir(parents=True)
     (skill / "SKILL.md").write_text("# AssemblyAI")
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 0
@@ -257,7 +257,7 @@ def test_install_force_reinstalls_skill(monkeypatch):
     skill.mkdir(parents=True)
     (skill / "SKILL.md").write_text("# AssemblyAI")
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install", "--force"])
     assert result.exit_code == 0
@@ -275,11 +275,11 @@ def test_install_force_reinstalls_skill(monkeypatch):
 
 def test_install_skips_skill_when_npx_missing(monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.setup.shutil.which",
+        "aai_cli.setup_exec.shutil.which",
         lambda tool: None if tool == "npx" else f"/usr/bin/{tool}",
     )
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 0
@@ -300,7 +300,7 @@ def test_install_aai_cli_skill_idempotent_when_present(monkeypatch):
     cli_skill.mkdir(parents=True)
     (cli_skill / "SKILL.md").write_text("# old")
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 0
@@ -315,7 +315,7 @@ def test_install_aai_cli_skill_force_reinstalls(monkeypatch):
     cli_skill.mkdir(parents=True)
     (cli_skill / "SKILL.md").write_text("# old")
     fake = FakeRun({("claude", "mcp", "get"): 1})
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", fake)
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", fake)
 
     result = runner.invoke(app, ["setup", "install", "--force"])
     assert result.exit_code == 0
@@ -336,7 +336,7 @@ def test_status_reports_all_installed(monkeypatch, tmp_path):
         (d / "SKILL.md").write_text("# x")
     # `mcp get` returns 0 -> present.
     monkeypatch.setattr(
-        "aai_cli.commands.setup.subprocess.run",
+        "aai_cli.setup_exec.subprocess.run",
         FakeRun({("claude", "mcp", "get"): 0}),
     )
 
@@ -352,7 +352,7 @@ def test_status_reports_all_installed(monkeypatch, tmp_path):
 def test_status_reports_not_installed(monkeypatch):
     _all_tools_present(monkeypatch)  # no skill dirs created
     monkeypatch.setattr(
-        "aai_cli.commands.setup.subprocess.run",
+        "aai_cli.setup_exec.subprocess.run",
         FakeRun({("claude", "mcp", "get"): 1}),
     )
 
@@ -367,10 +367,10 @@ def test_status_reports_not_installed(monkeypatch):
 
 def test_status_mcp_unknown_when_claude_missing(monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.setup.shutil.which",
+        "aai_cli.setup_exec.shutil.which",
         lambda tool: None if tool == "claude" else f"/usr/bin/{tool}",
     )
-    monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", FakeRun())
+    monkeypatch.setattr("aai_cli.setup_exec.subprocess.run", FakeRun())
 
     result = runner.invoke(app, ["setup", "status"])
     assert result.exit_code == 0
diff --git a/tests/test_setup_render.py b/tests/test_setup_render.py
index 8efd4bb2..2a2b1295 100644
--- a/tests/test_setup_render.py
+++ b/tests/test_setup_render.py
@@ -1,7 +1,7 @@
 import io
 
 from aai_cli import theme
-from aai_cli.commands.setup import render
+from aai_cli.setup_exec import render
 from aai_cli.steps import Step
 
 
diff --git a/tests/test_snapshots_help_root.py b/tests/test_snapshots_help_root.py
new file mode 100644
index 00000000..a168052f
--- /dev/null
+++ b/tests/test_snapshots_help_root.py
@@ -0,0 +1,22 @@
+"""Golden snapshot pinning the exact `assembly --help` render (the root screen).
+
+The root help lists every top-level command, so *any* new command changes it.
+It lives in its own snapshot module — separate from the per-group goldens — so
+that churn is confined to one trivially-regenerable ``.ambr`` file instead of
+conflicting inside a command group's goldens. It also pins the derived command
+ordering and panel layout (see ``aai_cli.command_registry``); refresh with::
+
+    uv run pytest tests/test_snapshots_help_root.py --snapshot-update
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from tests._snapshot_surface import assert_help_matches_snapshot
+
+pytestmark = pytest.mark.usefixtures("fixed_render_size")
+
+
+def test_root_help_matches_snapshot(snapshot):
+    assert_help_matches_snapshot([], snapshot)