diff --git a/api/app/lib/model_catalog.py b/api/app/lib/model_catalog.py
index 10ae67f42..7e105c293 100644
--- a/api/app/lib/model_catalog.py
+++ b/api/app/lib/model_catalog.py
@@ -145,26 +145,26 @@ def set_model_default(conn, catalog_id: int) -> bool:
     """
     Set a model as the default for its provider+category.
 
-    Clears existing default for that provider+category first.
+    Clears existing default for that provider+category first. Uses a
+    subquery rather than fetchone() + tuple-unpack so the function works
+    regardless of the connection's cursor_factory — RealDictCursor returns
+    dict-like rows that silently yield column *names* on tuple unpacking.
     """
     with conn.cursor() as cur:
-        # Get the provider and category for this model
-        cur.execute(
-            "SELECT provider, category FROM kg_api.provider_model_catalog WHERE id = %s",
-            (catalog_id,),
-        )
-        row = cur.fetchone()
-        if not row:
-            return False
-
-        provider, category = row
-
-        # Clear existing default
+        # Clear any existing default that shares the new model's
+        # (provider, category), excluding the new model itself in case it
+        # is already the default (so this call is idempotent).
         cur.execute(
             """UPDATE kg_api.provider_model_catalog
                SET is_default = FALSE, updated_at = NOW()
-               WHERE provider = %s AND category = %s AND is_default = TRUE""",
-            (provider, category),
+               WHERE is_default = TRUE
+                 AND id <> %s
+                 AND (provider, category) = (
+                     SELECT provider, category
+                     FROM kg_api.provider_model_catalog
+                     WHERE id = %s
+                 )""",
+            (catalog_id, catalog_id),
         )
 
         # Set new default (also ensures enabled)
@@ -174,8 +174,9 @@ def set_model_default(conn, catalog_id: int) -> bool:
                WHERE id = %s""",
             (catalog_id,),
         )
+        updated = cur.rowcount > 0
         conn.commit()
-        return True
+        return updated
 
 
 def update_model_pricing(
diff --git a/docs/architecture/database-schema/ADR-206-closed-vocabulary-annealing-actions-with-tiered-escalation-and-epistemic-ledger.md b/docs/architecture/database-schema/ADR-206-closed-vocabulary-annealing-actions-with-tiered-escalation-and-epistemic-ledger.md
new file mode 100644
index 000000000..dab0c2e21
--- /dev/null
+++ b/docs/architecture/database-schema/ADR-206-closed-vocabulary-annealing-actions-with-tiered-escalation-and-epistemic-ledger.md
@@ -0,0 +1,607 @@
+---
+status: Draft
+date: 2026-05-22
+deciders:
+  - aaronsb
+  - claude
+related:
+  - ADR-200
+---
+
+# ADR-206: Closed-Vocabulary Annealing Actions with Tiered Escalation and Epistemic Ledger
+
+## Context
+
+ADR-200 introduced annealing ontologies — `:Ontology` nodes that grow, merge,
+and dissolve under the supervision of a background worker that scores the
+graph and proposes structural changes. Phase 4 of ADR-200 added an executor
+that can carry out proposals automatically. Phases 1–4 are deployed; the
+mechanism works end-to-end on the happy path.
+
+The mechanism does *not* work on cases that fall between the two action types
+the schema actually offers. The `kg_api.annealing_proposals` table
+(migration 046) encodes the entire decision space as
+`proposal_type ∈ {promotion, demotion}`. Everything the system can decide must
+be one of those two verbs. Everything the executor can do must be the
+canonical implementation of one of those two verbs.
+
+This is too narrow.
+
+### Observed failure mode
+
+Annealing proposals 35, 36, and 37 in `kg_api.annealing_proposals` show the
+same pattern across three consecutive cycles, eight minutes apart:
+
+- Same donor ontology: `atlassian-api-bitbucket-dc`.
+- Same anchor concept (an authentication / connection sub-cluster).
+- Same downstream error: `Ontology 'atlassian-api-bitbucket-cloud' already exists.`
+- Same proposal_type: `promotion`.
+
+The LLM's own reasoning, captured in the `reasoning` column, *correctly*
+identified what should happen. It said, in effect, "the sub-cluster you found
+inside `atlassian-api-bitbucket-dc` is not a new domain — it is the same
+domain as the existing `atlassian-api-bitbucket-cloud` ontology, and these
+sources should be reassigned there." The reasoning was right. The action
+slot was wrong. There is no `SPLIT_INTO_EXISTING` verb, so the worker fell
+back to `promotion` with a colliding name, and the executor refused because
+the target already existed. Three identical failures in three cycles, because
+nothing about the proposal queue or the cycle planner is failure-aware.
+
+This single trace surfaces three layered defects, all in proposal vocabulary
+and decision-making — none of them in the executor primitives themselves
+(`create_ontology_node`, `rename_ontology`, `reassign_sources`,
+`dissolve_ontology` all exist and work):
+
+1. **Action vocabulary too narrow.** The LLM has an intent that has no
+   schema slot. Promotion and demotion cannot encode "split a sub-cluster
+   off donor X and merge it into existing target Y." Every intent that is
+   not promotion or demotion silently degrades to the nearest one and fails
+   at execution time.
+
+2. **LLM does not see the existing ontology namespace.** The prompt does not
+   include the inventory of existing ontologies, so the LLM cannot reason
+   about "merge into existing target." It can only describe a *new* target,
+   because that is the only target the prompt grammar permits.
+
+3. **Only one reasoning tier exists, and it is not failure-aware.** A single
+   LLM call decides the action with no escalation path and no memory of
+   prior failed attempts on the same signal. The system retries the same
+   bad decision until something else changes the underlying graph.
+
+A separate Phase-0 race condition between ingestion and annealing has been
+identified during this investigation. It is being filed as a GitHub issue
+and is **out of scope for this ADR**.
+
+### Why a closed vocabulary, not an open one
+
+The natural temptation is to let the LLM emit free-form instructions and
+have the executor interpret them. That collapses the boundary between
+decision and execution and reintroduces the exact problem this system was
+designed to avoid: the executor has to guess what the LLM meant, and any
+ambiguity becomes a runtime failure. A closed menu of fully-parameterised
+actions keeps the boundary sharp. The LLM picks one action and provides all
+the parameters; the executor maps that action to a known sequence of graph
+primitives with no interpretation. If the LLM cannot fit its intent into
+any action, the only honest answer is `ESCALATE`.
+
+### Why an escalation cascade, not a confidence dial
+
+Sonnet, today, is the only reasoner. It either succeeds or it fails, and
+when it fails there is no second opinion. This is a single point of failure
+in the decision pipeline. The fix is not "make Sonnet more confident" — it
+is to put a second reasoner above Sonnet that evaluates the *evaluation*,
+and a human above that for cases where two reasoners cannot agree. Each
+tier is invoked only when the tier below abstains. The chain is
+configured, not derived, so operators choose how much autonomy the system
+has.
+
+### Why the proposal queue must become a ledger
+
+ADR-200 framed proposals as an operational queue: items arrive, items are
+decided, items are executed or expire. Once we add a second reasoning tier
+that defends its decisions, and add control-tuning proposals where the
+system regulates itself, the queue stops being operational and starts being
+**evidence**. Past decisions are training data for future decisions.
+Confidence calibration becomes a closed loop. The queue becomes a
+permanent, mineable record of every structural decision the graph has ever
+made, with the reasoning chain attached.
+
+## Decision
+
+We extend the annealing system along four phases. Each phase is intended to
+land as a separate PR; together they replace the current Phase-4
+decision surface from ADR-200.
+
+### Phase 1 — Closed action vocabulary
+
+Replace `proposal_type ∈ {promotion, demotion}` with a closed menu of seven
+self-contained actions. Each action carries every parameter its execution
+needs; the executor performs no interpretation.
+
+| Action | Parameters | Executor mapping (existing primitives) |
+|---|---|---|
+| `SPLIT_NEW` | `donor_ontology`, `anchor_concept_id`, `new_name`, `new_description`, `cluster_selection ∈ {first_order, embedding_radius, named_concepts}`, `cluster_params` | `create_ontology_node` + `create_anchored_by_edge` + `reassign_sources` |
+| `SPLIT_INTO_EXISTING` | `donor_ontology`, `anchor_concept_id`, `target_ontology` (must exist, `≠ donor_ontology`), `cluster_selection`, `cluster_params` | `reassign_sources` only |
+| `MERGE` | `donor_ontologies` (≥2), `target_ontology` (survivor name OR new name), `new_description` (if new name) | `dissolve_ontology` × N → target |
+| `DECOMPOSE_TO_PRIMORDIAL` | `ontology`, `rationale` (required) | `dissolve_ontology` → primordial pool |
+| `RENAME` | `ontology`, `new_name`, `new_description` | `rename_ontology` + `rename_ontology_node` |
+| `NO_ACTION` | `reasoning` | nothing |
+| `ESCALATE` | `candidate_actions[]`, `what_i_know`, `what_i_dont_know`, `recommended_action`, `confidence` | pins to next tier in `escalation_chain` |
+
+`SPLIT_NEW` and `SPLIT_INTO_EXISTING` are deliberately distinct so the
+executor's validation can short-circuit obvious name collisions before any
+graph mutation is attempted. `SPLIT_INTO_EXISTING` requires
+`target_ontology` to already exist; `SPLIT_NEW` requires that it does not.
+This is the schema slot whose absence caused the 35/36/37 failure trace.
+
+**Cluster selection is part of the action, not the executor.** The LLM
+picks the strategy and parameters that define the donated cluster:
+- `first_order` — anchor concept plus its direct neighbours.
+- `embedding_radius` — concepts within cosine distance `r` of the anchor.
+- `named_concepts` — an explicit list of concept IDs.
+
+The executor materialises the cluster deterministically from the strategy.
+This keeps the "what to move" decision with the reasoner and the "how to
+move it" mechanics with the executor.
+
+**Backward compatibility.** Existing `promotion` and `demotion` rows
+remain valid for already-executed history. The two strings become read-only
+aliases (`promotion` ↔ `SPLIT_NEW`, `demotion` ↔ `DECOMPOSE_TO_PRIMORDIAL`)
+when the history view loads them. New proposals always use the expanded
+vocabulary.
+
+**Prompt expansion.** The Sonnet prompt for action selection must include:
+- The full ontology inventory: names, concept counts, lifecycle states.
+  Without this, `SPLIT_INTO_EXISTING` and `MERGE` are unreachable.
+- The signal kind that produced the candidate (e.g. `high_overlap_pair`,
+  `low_coherence_low_affinity`).
+- Local graph context around the anchor (first-order neighbourhood,
+  cross-ontology edges).
+- Recent failed proposals for the same signal, with their failure reasons.
+  Without this, the system retries the same bad action indefinitely.
+
+#### System invariant — the primordial pool is permanent
+
+The primordial pool (ADR-200's "everything else") is upgraded from a
+*starting posture* to a **load-bearing, undeletable system ontology**.
+Dissolution never destroys concepts; it relocates them.
+
+- `MERGE` deposits dissolved members in a named target ontology.
+- `DECOMPOSE_TO_PRIMORDIAL` deposits dissolved members in the primordial
+  pool, where future cycles can re-cluster them.
+
+The primordial pool cannot be the target of dissolution itself, cannot be
+renamed, and cannot be deleted. This is the system's guarantee against
+catastrophic forgetting — every concept that has ever entered the graph
+remains addressable somewhere.
+
+#### Action menu, mapped to primitives
+
+```mermaid
+flowchart TD
+    A[LLM picks one action from closed menu]
+
+    A --> SN[SPLIT_NEW]
+    A --> SE[SPLIT_INTO_EXISTING]
+    A --> M[MERGE]
+    A --> DP[DECOMPOSE_TO_PRIMORDIAL]
+    A --> R[RENAME]
+    A --> NA[NO_ACTION]
+    A --> ES[ESCALATE]
+
+    SN --> SN1[create_ontology_node]
+    SN --> SN2[create_anchored_by_edge]
+    SN --> SN3[reassign_sources from donor]
+
+    SE --> SE1[reassign_sources to existing target]
+
+    M --> M1[dissolve_ontology x N]
+    M --> M2[deposits in target ontology]
+
+    DP --> DP1[dissolve_ontology]
+    DP --> DP2[deposits in primordial pool]
+
+    R --> R1[rename_ontology]
+    R --> R2[rename_ontology_node]
+
+    NA --> NA1[no graph mutation]
+
+    ES --> ES1[pin to next tier in escalation_chain]
+```
+
+### Phase 2 — Tiered escalation cascade
+
+A proposal does not have to be decided by Sonnet. A proposal has to be
+decided by *whichever tier the configured `escalation_chain` requires*,
+working from the bottom up. The chain is platform-level configuration
+(same scope as model provider and API key — admin only).
+
+Three tiers exist:
+
+- **Sonnet — classifier (medium tier).** The default decision-maker.
+  Receives the prompt described in Phase 1 and emits one closed action.
+  If `golden_path_confidence` is exceeded and the action is non-`ESCALATE`,
+  the proposal proceeds to execution. Otherwise it pins to the next tier.
+
+- **Opus — arbitrator (high tier), "evaluate the evaluator".** Opus is
+  invoked when Sonnet abstains, when Sonnet's confidence is below the
+  golden-path threshold, or when the operator explicitly chains it.
+  Opus's prompt frames Sonnet's instructions and Sonnet's response as
+  **evidence quoted in XML tags** — `<sonnet_prompt>...</sonnet_prompt>`,
+  `<sonnet_response>...</sonnet_response>`, `<similar_past_decisions>...</similar_past_decisions>`
+  — never as Opus's own task. Opus picks one of:
+  - `APPROVE` — Sonnet's action stands.
+  - `MODIFY` — emit a different closed action (same vocabulary as Sonnet).
+  - `REJECT` — refuse to act on this signal; `NO_ACTION` with reason.
+  - `ESCALATE_HUMAN` — only valid if the chain permits.
+  - `ADJUST_CONTROL` — propose a tuning change (see Phase 3).
+  Opus's output must include a **defense** — a written justification of
+  why this verdict was reached, intended to be read by future cycles and
+  by humans. The central design intent is that Opus *defends* a decision,
+  not just picks one. The defense is permanent record.
+
+- **Human — final tier.** Multi-turn dialogue. The human can ask follow-up
+  questions ("why didn't you pick `MERGE`?"), the agent responds with a
+  new turn that may include a revised recommendation, then the human
+  commits a final decision. The dialogue is recorded turn-by-turn.
+
+The chain is an ordered list of tiers, configured per platform. Examples:
+
+| `escalation_chain` | Behavior |
+|---|---|
+| `["opus"]` | Full autonomous: Sonnet → Opus → execute. No human involvement. |
+| `["opus", "human"]` | Hybrid: Opus arbitrates; only Opus's `ESCALATE_HUMAN` reaches the operator. |
+| `["human"]` | Skip Opus: every Sonnet abstention pins directly to the operator. |
+| `[]` | Every Sonnet recommendation pins to human. Maximum oversight. |
+
+Sonnet itself is always present — it is the bottom of the funnel. The
+chain configures what happens *above* it.
+
+#### Three-tier escalation cascade
+
+```mermaid
+flowchart TD
+    SIG[signal generated] --> SON[Sonnet classifies]
+
+    SON -->|action picked, confidence >= golden_path| EXE[execute]
+    SON -->|ESCALATE or low confidence| ESC1{escalation_chain[0]}
+
+    ESC1 -->|opus| OPUS[Opus arbitrates]
+    ESC1 -->|human| HUM[Human dialogue]
+    ESC1 -->|empty chain| HUM
+
+    OPUS -->|APPROVE| EXE
+    OPUS -->|MODIFY| EXE
+    OPUS -->|REJECT| TERM_REJ[terminal: rejected]
+    OPUS -->|ADJUST_CONTROL| CTRL[control-tuning proposal]
+    OPUS -->|ESCALATE_HUMAN| ESC2{chain permits?}
+
+    ESC2 -->|yes| HUM
+    ESC2 -->|no| TERM_REJ
+
+    HUM -->|approve| EXE
+    HUM -->|modify| EXE
+    HUM -->|reject| TERM_REJ
+
+    EXE -->|success| TERM_EXE[terminal: executed]
+    EXE -->|failure| TERM_FAIL[terminal: failed]
+
+    CTRL --> CTRL_REV[Phase 3 control review]
+```
+
+#### Schema — reasoning chain as first-class data
+
+A new table `kg_api.annealing_proposal_messages` holds the per-turn
+reasoning chain:
+
+- `id`, `proposal_id` (FK), `turn_no`
+- `role ∈ {sonnet, opus, human, system}`
+- `body` JSONB — prompt, response, parameters, defense, dialogue text
+- `created_at`
+
+The `annealing_proposals` row carries the **verdict** (which action ran,
+or which terminal state was reached). The `annealing_proposal_messages`
+table carries the **full reasoning chain** that produced the verdict.
+Splitting them keeps the proposal row cheap to query and keeps the
+reasoning chain unbounded.
+
+#### GC invariant — every proposal reaches a terminal state
+
+Non-terminal stalls are defects. The existing `expires_at` column becomes
+load-bearing rather than advisory.
+
+| State | Terminal? |
+|---|---|
+| `pending` | non-terminal |
+| `pending_opus_review` | non-terminal |
+| `pending_human_review` | non-terminal |
+| `executing` | non-terminal |
+| `executed` | terminal |
+| `failed` | terminal |
+| `rejected` | terminal |
+| `expired` | terminal |
+
+A `proposal_gc` worker scans non-terminal proposals on a heartbeat and
+forces stale ones to `expired` with a synthetic `NO_ACTION` decision and
+a `system`-role message explaining the GC. Per-turn timeouts apply to
+human dialogues — e.g. 72h with no human response expires the proposal.
+GC events log loudly so stalls are visible.
+
+#### Proposal state machine
+
+```mermaid
+stateDiagram-v2
+    [*] --> pending: signal generated
+
+    pending --> executing: Sonnet picks action, confidence >= threshold
+    pending --> pending_opus_review: Sonnet ESCALATE or low confidence (chain has opus)
+    pending --> pending_human_review: Sonnet ESCALATE or low confidence (chain has human)
+
+    pending_opus_review --> executing: APPROVE or MODIFY
+    pending_opus_review --> rejected: REJECT
+    pending_opus_review --> pending_human_review: ESCALATE_HUMAN (chain permits)
+    pending_opus_review --> rejected: ESCALATE_HUMAN (chain forbids)
+
+    pending_human_review --> executing: human approves or modifies
+    pending_human_review --> rejected: human rejects
+    pending_human_review --> expired: per-turn timeout (e.g. 72h)
+
+    executing --> executed: executor success
+    executing --> failed: executor error
+
+    pending --> expired: expires_at reached (GC)
+    pending_opus_review --> expired: expires_at reached (GC)
+    pending_human_review --> expired: expires_at reached (GC)
+    executing --> expired: stuck > GC threshold (defect, logged loudly)
+
+    executed --> [*]: permanent ledger entry
+    failed --> [*]: permanent ledger entry
+    rejected --> [*]: permanent ledger entry
+    expired --> [*]: permanent ledger entry
+```
+
+### Phase 3 — Control surface and self-regulation
+
+Annealing behaviour is governed by a set of knobs in
+`kg_api.annealing_options`. Phase 3 makes that surface explicit, audited,
+and partially self-tuneable.
+
+| Control | Who can change | Effect |
+|---|---|---|
+| `min_activity_for_cycle` | Admin + Opus | Cycle no-ops unless graph moved enough since last run. Current defaults are too eager; this raises the floor. |
+| `min_ontology_age_epochs` | Admin + Opus | Fresh ontologies are exempt from evaluation for N epochs. |
+| `golden_path_confidence` | Admin + Opus | Sonnet's threshold to execute without escalating. |
+| `opus_confidence` | Admin only (safety rail) | Opus's threshold to escalate to human. |
+| `failure_cooldown_epochs` | Admin + Opus | After a failure, the same `(anchor, action_type, target)` triple won't re-propose for N epochs. |
+| `max_proposals_per_cycle` | Admin + Opus | Already exists in ADR-200. |
+| `phone_a_friend_cost_budget` | Admin only | Cost ceiling on Opus invocations per cycle. |
+| `automation_level` | Admin only (safety rail) | `autonomous` / `hitl`. |
+| `escalation_chain` | Admin only (safety rail) | Ordered list of tiers above Sonnet. |
+
+**Self-regulation invariant.** Opus may tune *operational* knobs (cadence,
+cooldowns, eligibility thresholds) via the `ADJUST_CONTROL` action. Opus
+may **not** tune *safety* knobs (`automation_level`, `escalation_chain`,
+`opus_confidence`, `phone_a_friend_cost_budget`). Each Opus-driven
+adjustment is itself a proposal in the queue, carrying a defense and
+visible in the audit trail. The system can regulate its own cadence, but
+cannot widen its own autonomy.
+
+**Snapshot, not live-read.** Each annealing cycle reads the control set
+once at cycle start and treats it as immutable for the duration of the
+cycle. If an `ADJUST_CONTROL` proposal lands mid-cycle, it takes effect
+at the next cycle. This avoids inconsistent half-applied policy mid-run.
+
+### Phase 4 — Epistemic ledger
+
+The proposal queue plus the reasoning-chain table together form a
+**permanent, mineable decision log**. Past decisions are training data for
+future decisions.
+
+#### Retention model
+
+Terminal proposals are kept forever. GC touches only non-terminal stalls.
+Storage cost of one proposal row plus its reasoning chain is dominated by
+the JSONB bodies and the embedding vector — bounded and acceptable at any
+plausible scale.
+
+#### Schema additions to `annealing_proposals`
+
+- `signal_embedding` — vector for nearest-neighbour retrieval. Lets Opus
+  RAG over its own past arbitrations.
+- `signal_payload` — the full LLM input context, not a summary. The same
+  decision can be re-evaluated later with a stronger model.
+- `signal_kind` — enum identifying which scoring path produced the
+  candidate (`high_overlap_pair`, `low_coherence_low_affinity`,
+  `low_protection_score`, ...).
+- `outcome_quality` — numeric, set asynchronously by a follow-up worker
+  at 1/7/30 days post-decision, analysing post-execution graph metrics
+  to score whether the decision improved or degraded the structure.
+- `superseded_by` — proposal_id of a later proposal that reversed this one.
+- `graph_delta_summary` — concrete structural changes recorded at execution.
+
+#### Opus as RAG agent over its own past
+
+Opus's arbitration prompt injects a `<similar_past_decisions>` block
+retrieved by cosine similarity on `signal_embedding`. Each retrieved
+record carries its action, its defense, and its eventual
+`outcome_quality`. Opus sees not only "what was decided" but "how it
+worked out." Arbitration becomes informed by precedent.
+
+#### Calibration as a closed loop
+
+The ledger turns confidence calibration from an observability concern into
+an empirical one:
+
+- **Confidence vs outcome** is directly mineable. Pair every proposal's
+  recorded `confidence` against its eventual `outcome_quality`. A
+  miscalibrated threshold is visible immediately.
+- **Threshold auto-tuning** has empirical input. Opus reading past
+  outcomes can recommend a `golden_path_confidence` that maximises
+  success rate at the current cost ceiling.
+- **Human-vs-Opus agreement** is scoreable for any proposal both tiers
+  touched. Divergences are the highest-value review items.
+
+#### Read-side surface
+
+- `kg anneal history` — paginated decision log.
+- `kg anneal similar <id>` — nearest decisions by signal embedding.
+- `kg anneal calibrate` — confidence-vs-outcome calibration report.
+- Web: a **Decision Log** panel, distinct from the existing
+  **Proposal Queue** panel. Queue shows non-terminal items requiring
+  attention; Log shows the permanent ledger.
+
+## Consequences
+
+### Positive
+
+- The LLM's intent is no longer silently truncated to fit a binary
+  vocabulary. `SPLIT_INTO_EXISTING` and `MERGE` are first-class.
+- The 35/36/37 failure trace becomes impossible by construction: the
+  prompt sees the ontology inventory, the action exists, the executor
+  performs no name guessing.
+- Escalation is configured, not derived. Operators choose how much
+  autonomy the system has, on a single control surface.
+- Opus *defending* decisions — rather than re-running them — gives the
+  system a written record of reasoning that future cycles and future
+  humans can read.
+- The primordial pool guarantee turns dissolution into a safe,
+  reversible operation. Nothing is lost; only relocated.
+- The proposal queue stops growing without bound: GC forces every
+  proposal to a terminal state.
+- Past decisions become training data. Calibration becomes a closed loop
+  rather than an observability dashboard.
+- Each of the four phases is independently shippable; later phases assume
+  earlier ones but earlier ones produce value on their own.
+
+### Negative
+
+- The schema gains a closed enum (the action vocabulary) and a new table
+  (`annealing_proposal_messages`). Vocabulary changes will require schema
+  migrations rather than configuration changes. This is the trade we are
+  making in exchange for a sharp decision/execution boundary.
+- Opus invocations cost more than Sonnet. The `phone_a_friend_cost_budget`
+  control bounds this, but the cost is real and non-zero. Calibration
+  determines whether the spend is worth it.
+- HITL multi-turn dialogues need UI surface area (turn-ordered display,
+  follow-up input, commit-decision button). Phase 2 cannot ship
+  user-visible HITL without ADR-700 work.
+- The closed vocabulary is, by definition, closed. Intents that fit none
+  of the seven actions must `ESCALATE` and get a human; we will discover
+  missing actions only by watching the escalation rate.
+- Snapshotting controls at cycle start means an `ADJUST_CONTROL`
+  proposal does not take effect until the *next* cycle. Operators must
+  understand this delay.
+
+### Neutral
+
+- Existing executor primitives (`create_ontology_node`,
+  `create_anchored_by_edge`, `reassign_sources`, `dissolve_ontology`,
+  `rename_ontology`, `rename_ontology_node`) are reused unchanged. This
+  ADR adds no new graph mutations; it adds decision and bookkeeping
+  layers above them.
+- `promotion` and `demotion` survive as read-only history aliases. No
+  existing data is rewritten.
+- Signal generation continues to reuse the existing scorer / affinity /
+  degree machinery. The work added by this ADR is in prompting,
+  arbitration, recording, and GC — additive only.
+- The ledger's mineable-history view (Phase 4) overlaps in spirit with
+  ADR-203's graph epoch event log, but operates at a higher semantic
+  level (decisions about structure, not raw event facts).
+
+## Alternatives Considered
+
+### A. Open-ended action grammar
+
+Let the LLM emit free-form structural instructions ("split this concept
+into a new ontology and rename the donor") and have the executor parse
+intent.
+
+**Rejected because:** This is the failure mode we are trying to escape.
+A free-form grammar moves the interpretation cost from prompt design to
+runtime parsing. Every ambiguity becomes an execution failure. A closed
+menu with parameters is verbose, but every action is verifiable before
+graph mutation begins.
+
+### B. Add a third `proposal_type ∈ {promotion, demotion, merge}` and stop there
+
+Treat the 35/36/37 case as a missing third verb. Add `merge` and call it
+done.
+
+**Rejected because:** This is a point fix. It does not address the prompt
+gap (LLM cannot see existing ontology names), the missing escalation tier,
+the lack of failure-awareness across cycles, or the queue-vs-ledger
+distinction. Three months later the same investigation will surface a
+different intent (RENAME, SPLIT_INTO_EXISTING) with no schema slot, and
+we will be back here. The closed vocabulary is the smallest change that
+addresses the *class* of failure.
+
+### C. Pure confidence-dial autonomy (no Opus tier)
+
+Replace the escalation cascade with a single confidence threshold:
+Sonnet decides, Sonnet executes if confident, Sonnet escalates to human
+if not.
+
+**Rejected because:** It leaves Sonnet as the single point of failure in
+the decision pipeline. LLM calibration is unreliable; "high confidence"
+on a structurally wrong decision is exactly the failure mode we observed.
+The point of Opus is to be a second reasoner that evaluates Sonnet's
+output as evidence, not a second decision-maker that re-runs the
+classification.
+
+### D. Proposal queue purges after N days
+
+Treat the proposal table as operational ephemera: GC everything older
+than 30 days regardless of terminal state.
+
+**Rejected because:** This destroys the substrate Phase 4 depends on.
+Confidence-vs-outcome calibration, RAG retrieval of similar past
+decisions, human-vs-Opus agreement scoring — all of these require a
+durable history. The ledger framing is not optional once the escalation
+cascade exists; it is what gives the cascade something to learn from.
+
+### E. Per-ontology control overrides via a separate `ontology_overrides` table
+
+Allow operators to override platform-level controls on a per-ontology
+basis through a dedicated relational table.
+
+**Deferred, not rejected.** The data model question (JSONB column on
+`:Ontology` node versus separate `ontology_overrides` table) is open
+and surfaced below. A platform-wide control set is sufficient for the
+first deployment; per-ontology override is a Phase 5 concern.
+
+## Open Questions
+
+- **Confidence contract.** Sonnet (and Opus) emit a numeric confidence,
+  but LLMs are not reliable probability calibrators. A qualitative
+  contract — "are there ≥2 plausible actions remaining?" — may be more
+  robust than a numeric threshold. The current design uses numeric
+  thresholds and lets Phase 4's calibration report expose the
+  miscalibration; a qualitative emission path is a possible refinement.
+
+- **Per-ontology control overrides.** Should an operator be able to
+  pin `automation_level = hitl` for a single sensitive ontology while
+  the rest of the platform runs autonomous? If yes, JSONB on the
+  Ontology node or a separate `ontology_overrides` table? Deferred.
+
+- **Per-proposal escalation overrides.** Can a human reviewer request
+  "skip Opus on this one, I want raw Sonnet uncertainty"? More power,
+  more UI surface, and a way for a single operator to bypass the
+  platform-level safety rail. Deferred.
+
+- **Mid-cycle control change behaviour.** Snapshotting at cycle start
+  is the resolution in principle, but the operator-facing semantics
+  ("you changed the threshold but it won't apply until cycle N+1")
+  need explicit UI affordance.
+
+- **Outcome quality scoring function.** Phase 4's `outcome_quality` is
+  defined as "numeric, async-set, derived from post-execution graph
+  metrics." The exact metrics (coherence drift, mass drift, cross-edge
+  ratio change, ...) are unspecified and need calibration against the
+  first weeks of ledger data.
+
+## Related ADRs
+
+- **ADR-200** — Annealing Ontologies. This ADR extends Phase 4 of ADR-200,
+  redesigning its action vocabulary and decision flow.
+- **ADR-203** — Graph Epoch Event Log. Operates at a lower level (raw
+  events); this ADR's ledger sits above it semantically.
diff --git a/operator/configure.py b/operator/configure.py
index 318618bfd..7dbbf440c 100755
--- a/operator/configure.py
+++ b/operator/configure.py
@@ -187,6 +187,7 @@ def cmd_embedding(self, args):
         """Configure embedding provider by activating a pre-configured profile"""
         profile_id = getattr(args, 'profile_id', None)
         provider_name = getattr(args, 'provider', None)
+        device = getattr(args, 'device', None)
 
         # If no profile_id or provider specified, list available profiles
         if profile_id is None and provider_name is None:
@@ -229,9 +230,21 @@ def cmd_embedding(self, args):
                 # Activate selected profile (use profile['id'] from query, not profile_id arg)
                 cur.execute("UPDATE kg_api.embedding_profile SET active = true WHERE id = %s", (profile['id'],))
 
+                # Optionally update the compute device on the activated profile.
+                # The wizard maps its GPU_MODE choice (mac/nvidia/amd/cpu) to a
+                # PyTorch device string here so the API container loads the
+                # model on the right accelerator at startup.
+                effective_device = profile['device']
+                if device:
+                    cur.execute(
+                        "UPDATE kg_api.embedding_profile SET device = %s WHERE id = %s",
+                        (device, profile['id']),
+                    )
+                    effective_device = device
+
                 conn.commit()
 
-                device_info = f" ({profile['device']})" if profile['device'] else ""
+                device_info = f" ({effective_device})" if effective_device else ""
                 print(f"✅ Activated: [{profile['id']}] {profile['provider']} / {profile['model_name']} ({profile['embedding_dimensions']} dims, {profile['precision']}){device_info}")
                 return True
 
@@ -323,6 +336,71 @@ def _validate_provider_key(self, provider, key):
                 return False
         return None
 
+    def _fetch_catalog_via_sdk(self, provider):
+        """Fetch a provider's model catalog without instantiating the full
+        AIProvider class.
+
+        AnthropicProvider and OllamaProvider eagerly construct an OpenAI
+        embedding provider in __init__ when none is supplied — but the
+        operator container has no loaded EmbeddingModelManager (only the API
+        container initializes one at startup), so get_embedding_provider()
+        returns None and the eager fallback runs and fails. fetch_model_catalog
+        itself only needs self.client (or self.api_key for OpenRouter), so we
+        construct the SDK client directly and bypass __init__ via __new__,
+        reusing the existing fetch_model_catalog implementation rather than
+        duplicating per-provider pricing/feature dicts.
+        """
+        from api.app.lib.ai_providers import (
+            _load_api_key,
+            OpenAIProvider,
+            AnthropicProvider,
+            OpenRouterProvider,
+        )
+
+        if provider == "openai":
+            from openai import OpenAI
+            key = _load_api_key("openai", None, "OPENAI_API_KEY")
+            if not key:
+                raise RuntimeError(
+                    "OpenAI API key not configured. Store it first via "
+                    "`configure.py api-key openai`."
+                )
+            prov = OpenAIProvider.__new__(OpenAIProvider)
+            prov.client = OpenAI(api_key=key)
+            return prov.fetch_model_catalog()
+
+        if provider == "anthropic":
+            from anthropic import Anthropic
+            key = _load_api_key("anthropic", None, "ANTHROPIC_API_KEY")
+            if not key:
+                raise RuntimeError(
+                    "Anthropic API key not configured. Store it first via "
+                    "`configure.py api-key anthropic`."
+                )
+            prov = AnthropicProvider.__new__(AnthropicProvider)
+            prov.client = Anthropic(api_key=key)
+            return prov.fetch_model_catalog()
+
+        if provider == "openrouter":
+            from openai import OpenAI
+            key = _load_api_key("openrouter", None, "OPENROUTER_API_KEY")
+            if not key:
+                raise RuntimeError(
+                    "OpenRouter API key not configured. Store it first via "
+                    "`configure.py api-key openrouter`."
+                )
+            prov = OpenRouterProvider.__new__(OpenRouterProvider)
+            # OpenRouter's fetch_model_catalog uses self.api_key for the
+            # Authorization header and OPENROUTER_BASE_URL from the class.
+            prov.api_key = key
+            return prov.fetch_model_catalog()
+
+        # Other providers (ollama, llamacpp) — fall back to the original
+        # construction. They don't currently appear in the guided wizard, and
+        # their catalog refresh has different requirements (base_url, etc.).
+        from api.app.lib.ai_providers import get_provider
+        return get_provider(provider).fetch_model_catalog()
+
     def cmd_api_key(self, args):
         """Store encrypted API key"""
         provider = args.provider
@@ -469,11 +547,9 @@ def cmd_models(self, args):
 
                 print(f"🔄 Fetching model catalog from {provider}...")
                 try:
-                    from api.app.lib.ai_providers import get_provider
                     from api.app.lib.model_catalog import upsert_catalog_entries
 
-                    prov = get_provider(provider)
-                    entries = prov.fetch_model_catalog()
+                    entries = self._fetch_catalog_via_sdk(provider.lower())
 
                     if not entries:
                         print(f"⚠️  No models returned from {provider}")
@@ -695,6 +771,7 @@ def main():
     embed_parser = subparsers.add_parser('embedding', help='List or activate embedding profile')
     embed_parser.add_argument('profile_id', nargs='?', type=int, help='Profile ID to activate (omit to list profiles)')
     embed_parser.add_argument('--provider', help='Select profile by provider name (local, openai)')
+    embed_parser.add_argument('--device', help='Set compute device on the activated profile (cpu, cuda, mps)')
 
     # api-key
     key_parser = subparsers.add_parser('api-key', help='Store encrypted API key')
diff --git a/operator/lib/guided-init.sh b/operator/lib/guided-init.sh
index 52aa57863..74435f73e 100755
--- a/operator/lib/guided-init.sh
+++ b/operator/lib/guided-init.sh
@@ -343,9 +343,30 @@ fi
 docker exec kg-operator python /workspace/operator/configure.py admin --password "$ADMIN_PASSWORD"
 echo ""
 
-# Step 4: Configure AI provider (interactive selection)
+# Step 4: Configure local embedding profile (GPU_MODE-aware)
+# This runs BEFORE AI provider selection because the embedding model is
+# system-level infrastructure that the API container loads at startup. It
+# is also activated against the device chosen at the very start of the
+# wizard, so the user's GPU/CPU intent is honored end-to-end.
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 4/9: Choosing AI extraction provider${NC}"
+echo -e "${BOLD}Step 4/9: Configuring local embedding profile${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo ""
+
+case "$GPU_MODE" in
+    mac)                 EMBEDDING_DEVICE="mps" ;;
+    nvidia)              EMBEDDING_DEVICE="cuda" ;;
+    amd|amd-host)        EMBEDDING_DEVICE="cuda" ;;  # PyTorch ROCm presents as cuda
+    cpu|*)               EMBEDDING_DEVICE="cpu" ;;
+esac
+
+echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5) on device: ${EMBEDDING_DEVICE}"
+docker exec kg-operator python /workspace/operator/configure.py embedding --provider local --device "$EMBEDDING_DEVICE"
+echo ""
+
+# Step 5: Configure AI provider (interactive selection)
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BOLD}Step 5/9: Choosing AI extraction provider${NC}"
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo ""
 echo "Choose your AI extraction provider:"
@@ -392,9 +413,9 @@ case "$REPLY" in
 esac
 echo ""
 
-# Step 5: Store API key (skip for Ollama)
+# Step 6: Store API key (skip for Ollama)
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 5/9: Validating API key${NC}"
+echo -e "${BOLD}Step 6/9: Validating API key${NC}"
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo ""
 
@@ -427,9 +448,9 @@ while [ "$API_KEY_STORED" = false ]; do
     fi
 done
 
-# Step 6: Refresh model catalog and select model
+# Step 7: Refresh model catalog and select model
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 6/9: Selecting extraction model${NC}"
+echo -e "${BOLD}Step 7/9: Selecting extraction model${NC}"
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo ""
 
@@ -625,15 +646,6 @@ else
 fi
 echo ""
 
-# Step 7: Configure embeddings
-echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 7/9: Configuring embedding provider${NC}"
-echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo ""
-echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..."
-docker exec kg-operator python /workspace/operator/configure.py embedding --provider local
-echo ""
-
 # Step 8: Configure Garage credentials
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo -e "${BOLD}Step 8/9: Configuring Garage object storage${NC}"