diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..f1900a66
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,8 @@
+{
+  "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
+  "python.testing.pytestEnabled": true,
+  "python.testing.unittestEnabled": false,
+  "evenBetterToml.schema.enabled": false,
+  "mypy-type-checker.importStrategy": "fromEnvironment",
+  "mypy-type-checker.preferDaemon": true
+}
diff --git a/aai_cli/app/init_exec.py b/aai_cli/app/init_exec.py
index bf1602ef..10bfd731 100644
--- a/aai_cli/app/init_exec.py
+++ b/aai_cli/app/init_exec.py
@@ -61,7 +61,11 @@ def _pick_template() -> str:
     choice = questionary.select(
         "Pick a template",
         choices=[
-            questionary.Choice(title=templates.title_for(t), value=t)
+            questionary.Choice(
+                title=templates.title_for(t),
+                value=t,
+                description=templates.description_for(t),
+            )
             for t in templates.TEMPLATE_ORDER
         ],
     ).ask()
@@ -101,6 +105,10 @@ def _active_env_vars() -> dict[str, str]:
         "ASSEMBLYAI_STREAMING_HOST": env.streaming_host,
         # Voice Agent host mirrors the streaming host's naming across environments.
         "ASSEMBLYAI_AGENTS_HOST": env.streaming_host.replace("streaming", "agents", 1),
+        # Streaming-TTS host for the cascade (agent-framework) template. Empty in
+        # production, where streaming TTS has no host; that template then refuses to
+        # run and points at --sandbox.
+        "ASSEMBLYAI_TTS_HOST": env.streaming_tts_host,
     }
 
 
diff --git a/aai_cli/init/scaffold.py b/aai_cli/init/scaffold.py
index a25159ee..55d851df 100644
--- a/aai_cli/init/scaffold.py
+++ b/aai_cli/init/scaffold.py
@@ -39,7 +39,7 @@ def _template_root(template: str) -> Traversable:
         )
     # Navigate from the `aai_cli.init` package (templates/ has no __init__.py, so it
     # is not itself an importable package).
-    root = resources.files("aai_cli.init") / "templates" / template
+    root = resources.files("aai_cli.init") / "templates" / templates.dir_for(template)
     # Defense in depth: the registry should only list shipped templates, but if it ever
     # drifts ahead of the on-disk directories, fail cleanly instead of with a traceback.
     if not root.is_dir():
@@ -76,10 +76,17 @@ def existing_env_key(target: Path) -> str | None:
     return None
 
 
-def _copy_tree(node: Traversable, dest: Path) -> None:
+def _copy_tree(node: Traversable, dest: Path, *, top_level: bool = True) -> None:
     for child in node.iterdir():
         if child.name in _SKIP_NAMES or child.name.endswith(".pyc"):
             continue
+        # The template dir is an importable package in-repo (so it can be type-checked),
+        # but its root __init__.py is just that in-repo marker — not part of the shipped
+        # app. Skip it so the scaffolded project root doesn't become a stray package.
+        # (api/'s own __init__.py is one level down and IS copied — the shipped app's
+        # `from . import settings` needs it.)
+        if top_level and child.name == "__init__.py":
+            continue
         name = _DOTFILE_RENAMES.get(child.name, child.name)
         out = dest / name
         if child.is_dir():
@@ -87,7 +94,7 @@ def _copy_tree(node: Traversable, dest: Path) -> None:
             # node's parent before descending, so `dest` (and `out.parent`) already
             # exists. exist_ok is exercised by the idempotent re-scaffold test.
             out.mkdir(parents=True, exist_ok=True)  # pragma: no mutate
-            _copy_tree(child, out)
+            _copy_tree(child, out, top_level=False)
         else:
             out.parent.mkdir(parents=True, exist_ok=True)  # pragma: no mutate
             out.write_bytes(child.read_bytes())
diff --git a/aai_cli/init/templates.py b/aai_cli/init/templates.py
deleted file mode 100644
index a16ee625..00000000
--- a/aai_cli/init/templates.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-# id -> human-facing title shown in the picker. Ids are Vercel-style
-# project/example slugs rather than CLI command names.
-#
-# Every id here MUST have a directory under templates/<id>/ (a test enforces both
-# directions) — the picker must never advertise a template that would crash on scaffold.
-TEMPLATES: dict[str, str] = {
-    "audio-transcription": "Audio Transcription",
-    "live-captions": "Live Captions",
-    "voice-agent": "Voice Agent",
-}
-
-# Display order for the picker and `--help`.
-TEMPLATE_ORDER: tuple[str, ...] = ("audio-transcription", "live-captions", "voice-agent")
-
-
-def is_template(name: str) -> bool:
-    return name in TEMPLATES
-
-
-def title_for(name: str) -> str:
-    """The human title for a template id, or the raw id if unknown."""
-    return TEMPLATES.get(name, name)
diff --git a/aai_cli/init/templates/__init__.py b/aai_cli/init/templates/__init__.py
new file mode 100644
index 00000000..3fa4dcd5
--- /dev/null
+++ b/aai_cli/init/templates/__init__.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+# id -> human-facing title shown in the picker. Ids are Vercel-style
+# project/example slugs rather than CLI command names.
+#
+# Every id here MUST have a directory under templates/<id>/ (a test enforces both
+# directions) — the picker must never advertise a template that would crash on scaffold.
+TEMPLATES: dict[str, str] = {
+    "audio-transcription": "Audio Transcription",
+    "live-captions": "Live Captions",
+    "voice-agent": "Voice Agent",
+    "agent-framework": "Agent Framework",
+}
+
+# Display order for the picker and `--help`.
+TEMPLATE_ORDER: tuple[str, ...] = (
+    "audio-transcription",
+    "live-captions",
+    "voice-agent",
+    "agent-framework",
+)
+
+
+# One-line description shown beside each title in the interactive picker. Keys must
+# match TEMPLATES exactly (a test enforces both directions).
+DESCRIPTIONS: dict[str, str] = {
+    "audio-transcription": "Transcribe audio & video files, URLs, and YouTube — speaker labels and audio intelligence",
+    "live-captions": "Live real-time captions from your microphone over the Streaming API",
+    "voice-agent": "Full-duplex voice agent (speech in, LLM reply, speech out) via the Voice Agent API",
+    "agent-framework": "Cascaded voice agent you orchestrate: Streaming STT, the LLM Gateway, and sandbox TTS",
+}
+
+
+def dir_for(name: str) -> str:
+    """The on-disk template directory for an id: kebab id -> underscore package dir."""
+    return name.replace("-", "_")
+
+
+def is_template(name: str) -> bool:
+    return name in TEMPLATES
+
+
+def title_for(name: str) -> str:
+    """The human title for a template id, or the raw id if unknown."""
+    return TEMPLATES.get(name, name)
+
+
+def description_for(name: str) -> str:
+    """The one-line picker description for a template id, or '' when unknown."""
+    return DESCRIPTIONS.get(name, "")
diff --git a/aai_cli/init/templates/agent_framework/AGENTS.md b/aai_cli/init/templates/agent_framework/AGENTS.md
new file mode 100644
index 00000000..4f98593c
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/AGENTS.md
@@ -0,0 +1,37 @@
+# Agent Notes
+
+This is a buildless FastAPI + browser starter for a **cascaded** voice agent
+(Streaming STT -> LLM Gateway -> streaming TTS), orchestrated server-side. Run it with:
+
+```sh
+assembly dev
+```
+
+## Map
+
+- `api/settings.py`: API key, hosts, model, voice, system prompt, greeting, sample rates.
+- `api/cascade.py`: the orchestrator — STT/TTS socket helpers, the LLM stream, turn
+  detection, barge-in, and the `/ws` browser adapter. Built with injected `Deps` so it
+  is tested against fakes.
+- `api/index.py`: FastAPI app — serves the page/assets and the `/ws` WebSocket.
+- `static/app.js`: WebSocket lifecycle, mic capture, UI state, and event handling
+  (`_CONFIG` block at the top is the primary edit point).
+- `static/audio.js`: microphone pipeline, PCM conversion, playback queue, barge-in.
+- `static/styles.css`: visual styling only; the top `:root` block is the theme edit point.
+- `static/index.html`: page structure and static asset links.
+
+## Change Points
+
+- Model, voice, prompt, greeting, sample rates: edit `api/settings.py`.
+- Cascade behavior (turn detection, barge-in, LLM->TTS piping): edit `api/cascade.py`.
+- Transcript log rendering: edit `addTurn` in `static/app.js`.
+- Playback, barge-in, or PCM conversion: edit `static/audio.js`.
+
+## Invariants
+
+- Never expose `ASSEMBLYAI_API_KEY` or any server secret in `static/`.
+- Streaming TTS is sandbox-only; keep this app pointed at the sandbox hosts.
+- `reply.audio` carries base64 PCM on the `data` field.
+- The browser <-> backend event protocol matches the `voice-agent` template — keep it
+  stable so `static/audio.js` and the UI stay reusable.
+- Keep the app buildless unless the user explicitly asks for a frontend toolchain.
diff --git a/aai_cli/init/templates/audio-transcription/Dockerfile b/aai_cli/init/templates/agent_framework/Dockerfile
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/Dockerfile
rename to aai_cli/init/templates/agent_framework/Dockerfile
diff --git a/aai_cli/init/templates/audio-transcription/Procfile b/aai_cli/init/templates/agent_framework/Procfile
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/Procfile
rename to aai_cli/init/templates/agent_framework/Procfile
diff --git a/aai_cli/init/templates/agent_framework/README.md b/aai_cli/init/templates/agent_framework/README.md
new file mode 100644
index 00000000..019152a1
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/README.md
@@ -0,0 +1,49 @@
+# Talk to a cascaded voice agent — AssemblyAI agent-framework starter
+
+Click connect and talk. Unlike the `voice-agent` template (which uses AssemblyAI's
+all-in-one Voice Agent API), this app is a **cascade your own backend orchestrates**:
+Streaming STT transcribes you, the LLM Gateway generates a reply, and streaming TTS
+speaks it back — with turn detection and barge-in handled server-side. The browser
+holds one WebSocket to your backend, so your API key never reaches the client.
+
+## Sandbox-only
+
+Streaming TTS has no production host, so the whole cascade runs against the AssemblyAI
+sandbox with a sandbox key. Scaffold it that way:
+
+```sh
+assembly --sandbox init agent-framework
+```
+
+That pins the sandbox hosts in `.env`. Running against production exits with a hint.
+
+## Run locally
+
+```sh
+assembly dev   # opens http://localhost:3000 (allow microphone access; headphones recommended)
+```
+
+`ASSEMBLYAI_API_KEY` is read from `.env` (created for you by `assembly init`).
+
+## Deploy
+
+This app keeps a **long-running WebSocket**, so it needs a persistent process — not
+Vercel's serverless functions. Use the shipped `Procfile`/`Dockerfile` on Render,
+Railway, Fly.io, or Google Cloud Run (`gcloud run deploy --source .`):
+
+```sh
+uvicorn api.index:app --host 0.0.0.0 --port $PORT
+```
+
+Set `ASSEMBLYAI_API_KEY` and the three sandbox host vars (`ASSEMBLYAI_STREAMING_HOST`,
+`ASSEMBLYAI_TTS_HOST`, `ASSEMBLYAI_LLM_GATEWAY_URL`) in the platform's environment.
+
+## Ideas to extend
+
+- Change the `MODEL`, `VOICE`, `SYSTEM_PROMPT`, `GREETING`, or `MAX_HISTORY` in
+  `api/settings.py`.
+- Replies already stream into TTS sentence-by-sentence as the LLM produces them
+  (`_generate_reply` flushes on each `.`/`!`/`?`), and a sliding window of
+  `MAX_HISTORY` messages gives the agent memory of the conversation. Tune the
+  sentence boundary or `MAX_HISTORY` to trade latency, cost, and recall.
+- Add tools (function calling) on the LLM leg so the agent can look things up.
diff --git a/aai_cli/init/templates/audio-transcription/api/__init__.py b/aai_cli/init/templates/agent_framework/__init__.py
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/api/__init__.py
rename to aai_cli/init/templates/agent_framework/__init__.py
diff --git a/aai_cli/init/templates/live-captions/api/__init__.py b/aai_cli/init/templates/agent_framework/api/__init__.py
similarity index 100%
rename from aai_cli/init/templates/live-captions/api/__init__.py
rename to aai_cli/init/templates/agent_framework/api/__init__.py
diff --git a/aai_cli/init/templates/agent_framework/api/cascade.py b/aai_cli/init/templates/agent_framework/api/cascade.py
new file mode 100644
index 00000000..e2b27bcf
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/api/cascade.py
@@ -0,0 +1,404 @@
+"""Server-side cascade orchestrator for the agent-framework template.
+
+The browser opens one WebSocket to FastAPI and the backend wires three AssemblyAI
+primitives together — Streaming STT, the LLM Gateway, and streaming TTS — so every
+credential stays on the server. The orchestrator takes injected connect-factories and
+an LLM callable (`Deps`) so it runs hermetically against fakes in tests, the same
+seam `aai_cli/tts/session.py` uses.
+
+Browser protocol (identical to the voice-agent template):
+  in : {"type": "input.audio", "audio": <base64 PCM>}
+  out: transcript.user / transcript.agent / reply.audio (base64 in `data`) /
+       input.speech.started / reply.done / session.error
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import contextlib
+import json
+from collections.abc import AsyncIterator, Awaitable, Callable
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Protocol
+from urllib.parse import urlencode
+
+from fastapi import WebSocket
+
+if TYPE_CHECKING:
+    from openai.types.chat import ChatCompletionMessageParam
+    from websockets.asyncio.client import ClientConnection
+
+
+class _Settings(Protocol):
+    API_KEY: str
+    STREAMING_HOST: str
+    TTS_HOST: str
+    LLM_GATEWAY_URL: str
+    MODEL: str
+    VOICE: str
+    SYSTEM_PROMPT: str
+    GREETING: str
+    MAX_HISTORY: int
+    INPUT_SAMPLE_RATE: int
+    OUTPUT_SAMPLE_RATE: int
+
+
+class _Browser(Protocol):
+    async def send(self, event: dict[str, object]) -> None:
+        """Send one protocol event to the browser."""
+
+    async def recv(self) -> dict[str, object] | None:
+        """Receive the next browser message, or None once the socket closes."""
+
+
+def unavailable_reason(settings: _Settings) -> str | None:
+    """Why the cascade can't run, or None when it can.
+
+    Streaming TTS has no production host, so an empty TTS host means the user must
+    re-scaffold against the sandbox.
+    """
+    if not settings.API_KEY:
+        return "ASSEMBLYAI_API_KEY is not set — configure it in your deployment's environment."
+    if not settings.TTS_HOST:
+        return (
+            "Streaming TTS has no production host, so this cascade is sandbox-only. "
+            "Re-scaffold against the sandbox: assembly --sandbox init agent-framework."
+        )
+    return None
+
+
+def stt_url(settings: _Settings) -> str:
+    """The Streaming v3 WebSocket URL with PCM + turn-formatting params."""
+    params = urlencode(
+        {
+            "sample_rate": settings.INPUT_SAMPLE_RATE,
+            "encoding": "pcm_s16le",
+            "speech_model": "u3-rt-pro",
+            "format_turns": "true",
+        }
+    )
+    return f"wss://{settings.STREAMING_HOST}/v3/ws?{params}"
+
+
+def tts_url(settings: _Settings) -> str:
+    """The streaming-TTS WebSocket URL for the configured voice and sample rate."""
+    params = urlencode({"voice": settings.VOICE, "sample_rate": settings.OUTPUT_SAMPLE_RATE})
+    return f"wss://{settings.TTS_HOST}/v1/ws/?{params}"
+
+
+def is_final_user_turn(msg: dict[str, object]) -> bool:
+    """True for a finalized, formatted end-of-turn (the cue to reply)."""
+    return bool(msg.get("end_of_turn")) and bool(msg.get("turn_is_formatted"))
+
+
+def build_messages(
+    system_prompt: str, history: list[ChatCompletionMessageParam]
+) -> list[ChatCompletionMessageParam]:
+    """The chat `messages` array: the system prompt followed by the conversation so far."""
+    return [{"role": "system", "content": system_prompt}, *history]
+
+
+def _trim_history(history: list[ChatCompletionMessageParam], max_messages: int) -> None:
+    """Cap the running history to the most recent ``max_messages`` (sliding window)."""
+    if len(history) > max_messages:
+        del history[: len(history) - max_messages]
+
+
+def _split_sentences(buffer: str) -> tuple[list[str], str]:
+    """Split off complete sentences (each ending in . ! ?). Return (sentences, remainder)."""
+    sentences: list[str] = []
+    start = 0
+    for index, char in enumerate(buffer):
+        if char in ".!?":
+            sentence = buffer[start : index + 1].strip()
+            if sentence:
+                sentences.append(sentence)
+            start = index + 1
+    return sentences, buffer[start:]
+
+
+@dataclass
+class Deps:
+    """Injected cascade dependencies. `Deps.real(settings)` wires the live clients;
+    tests pass fakes with the same shapes."""
+
+    connect_stt: Callable[[], Awaitable[ClientConnection]]
+    connect_tts: Callable[[], Awaitable[ClientConnection]]
+    llm_stream: Callable[[list[ChatCompletionMessageParam]], AsyncIterator[str]]
+    settings: _Settings
+
+    @classmethod
+    def real(cls, settings: _Settings) -> Deps:
+        return cls(
+            connect_stt=lambda: _connect_stt(settings),
+            connect_tts=lambda: _connect_tts(settings),
+            llm_stream=lambda messages: _llm_stream(settings, messages),
+            settings=settings,
+        )
+
+
+class Session:
+    """Tracks the in-flight reply so a new user turn can barge in and cancel it."""
+
+    def __init__(self) -> None:
+        self.reply_task: asyncio.Task[None] | None = None
+        self.history: list[ChatCompletionMessageParam] = []
+
+    async def cancel_reply(self) -> None:
+        task, self.reply_task = self.reply_task, None
+        if task is not None and not task.done():
+            task.cancel()
+            with contextlib.suppress(asyncio.CancelledError, Exception):
+                await task
+
+    async def drain(self) -> None:
+        """Await the in-flight reply to natural completion (used when STT closes)."""
+        task = self.reply_task
+        if task is not None:
+            with contextlib.suppress(Exception):
+                await task
+
+
+async def _connect_stt(settings: _Settings) -> ClientConnection:
+    import websockets
+
+    return await websockets.connect(
+        stt_url(settings), additional_headers={"Authorization": settings.API_KEY}
+    )
+
+
+async def _connect_tts(settings: _Settings) -> ClientConnection:
+    import websockets
+
+    # max_size=None: a synthesis's Audio frames can exceed the 1 MiB default.
+    return await websockets.connect(
+        tts_url(settings),
+        additional_headers={"Authorization": settings.API_KEY},
+        max_size=None,
+    )
+
+
+async def _llm_stream(
+    settings: _Settings, messages: list[ChatCompletionMessageParam]
+) -> AsyncIterator[str]:
+    from openai import AsyncOpenAI
+
+    client = AsyncOpenAI(base_url=settings.LLM_GATEWAY_URL, api_key=settings.API_KEY)
+    stream = await client.chat.completions.create(
+        model=settings.MODEL, messages=messages, stream=True
+    )
+    async for chunk in stream:
+        # The gateway (Anthropic-backed, OpenAI-compatible) ends the stream with a
+        # usage/final chunk that carries no choices — skip it instead of IndexError-ing.
+        if not chunk.choices:
+            continue
+        delta = chunk.choices[0].delta.content
+        if delta:
+            yield delta
+
+
+async def _safe_close(conn: ClientConnection) -> None:
+    with contextlib.suppress(Exception):
+        await conn.close()
+
+
+async def _pump_mic(browser: _Browser, stt: ClientConnection) -> None:
+    """Forward each base64 mic frame from the browser to the STT socket."""
+    while True:
+        msg = await browser.recv()
+        if msg is None:
+            return
+        audio = msg.get("audio") if msg.get("type") == "input.audio" else None
+        if isinstance(audio, str):
+            try:
+                pcm = base64.b64decode(audio)
+            except ValueError:
+                continue  # ignore a malformed audio frame rather than kill the session
+            await stt.send(pcm)
+
+
+async def _synthesize(browser: _Browser, tts: ClientConnection, text: str) -> None:
+    """Drive the TTS protocol on an open socket, forwarding Audio as reply.audio."""
+    begin = json.loads(await tts.recv())
+    if begin.get("type") != "Begin":
+        raise RuntimeError(f"TTS did not begin (got {begin.get('type')!r}).")
+    await tts.send(json.dumps({"type": "Generate", "text": text}))
+    await tts.send(json.dumps({"type": "Flush"}))
+    # Iterate the socket (like _pump_stt) so a close before the final Audio frame ends
+    # the loop cleanly instead of raising ConnectionClosed out of the reply.
+    async for raw in tts:
+        frame = json.loads(raw)
+        kind = frame.get("type")
+        if kind == "Audio":
+            await browser.send({"type": "reply.audio", "data": frame.get("audio", "")})
+            if frame.get("is_final"):
+                break
+        elif kind == "Error":
+            raise RuntimeError(frame.get("error") or "TTS error")
+    with contextlib.suppress(Exception):
+        await tts.send(json.dumps({"type": "Terminate"}))
+    await _safe_close(tts)
+
+
+async def _speak(browser: _Browser, deps: Deps, text: str) -> None:
+    """Emit agent text, synthesize it, and mark the reply done. A synthesis failure
+    becomes one clean session.error (mirroring _generate_reply) — without this the
+    greeting runs as a bare task whose exception would only ever be swallowed by
+    cancel_reply/drain, leaving the user with no audio and no error."""
+    await browser.send({"type": "transcript.agent", "text": text})
+    try:
+        tts = await deps.connect_tts()
+        try:
+            await _synthesize(browser, tts, text)
+        finally:
+            await _safe_close(tts)
+    except asyncio.CancelledError:
+        raise
+    except Exception as exc:
+        await browser.send({"type": "session.error", "message": str(exc)})
+        return
+    await browser.send({"type": "reply.done", "status": "completed"})
+
+
+async def _speak_sentence(browser: _Browser, deps: Deps, text: str) -> None:
+    """Show + synthesize one sentence of a streamed reply (no reply.done)."""
+    await browser.send({"type": "transcript.agent", "text": text})
+    tts = await deps.connect_tts()
+    try:
+        await _synthesize(browser, tts, text)
+    finally:
+        await _safe_close(tts)
+
+
+async def _generate_reply(browser: _Browser, deps: Deps, session: Session) -> None:
+    """Stream the LLM reply sentence-by-sentence into TTS (low perceived latency), then
+    record it in the conversation history. Errors surface as session.error."""
+    messages = build_messages(deps.settings.SYSTEM_PROMPT, session.history)
+    spoken: list[str] = []
+    try:
+        buffer = ""
+        async for delta in deps.llm_stream(messages):
+            buffer += delta
+            sentences, buffer = _split_sentences(buffer)
+            for sentence in sentences:
+                spoken.append(sentence)
+                await _speak_sentence(browser, deps, sentence)
+        tail = buffer.strip()
+        if tail:
+            spoken.append(tail)
+            await _speak_sentence(browser, deps, tail)
+        reply = " ".join(spoken).strip()
+        if not reply:
+            await browser.send({"type": "reply.done", "status": "empty"})
+            return
+        session.history.append({"role": "assistant", "content": reply})
+        _trim_history(session.history, deps.settings.MAX_HISTORY)
+        await browser.send({"type": "reply.done", "status": "completed"})
+    except asyncio.CancelledError:
+        # Barged-in mid-reply: record what was actually spoken so history keeps its
+        # user/assistant alternation (otherwise the next user turn would follow this
+        # one with no assistant turn between them).
+        partial = " ".join(spoken).strip()
+        if partial:
+            session.history.append({"role": "assistant", "content": partial})
+            _trim_history(session.history, deps.settings.MAX_HISTORY)
+        raise
+    except Exception as exc:  # any leg failure becomes one clean session.error event
+        await browser.send({"type": "session.error", "message": str(exc)})
+
+
+async def maybe_barge_in(browser: _Browser, session: Session) -> None:
+    """If a reply is playing, tell the browser to stop and cancel it."""
+    if session.reply_task is not None and not session.reply_task.done():
+        await browser.send({"type": "input.speech.started"})
+        await session.cancel_reply()
+
+
+async def _pump_stt(browser: _Browser, stt: ClientConnection, deps: Deps, session: Session) -> None:
+    """Read STT turns: display only the finalized (formatted end-of-turn) user
+    transcript and reply to it. An interim turn isn't shown — it only barges in on a
+    playing reply. Drain the last reply when the socket closes."""
+    async for raw in stt:
+        msg = json.loads(raw)
+        if msg.get("type") != "Turn":
+            continue
+        text = msg.get("transcript", "")
+        if not text:
+            continue
+        if is_final_user_turn(msg):
+            await browser.send({"type": "transcript.user", "text": text})
+            # Stop any reply still playing AND tell the browser to flush its queued
+            # audio (cancel_reply alone is server-side only — the old reply keeps
+            # playing in the browser).
+            await maybe_barge_in(browser, session)
+            session.history.append({"role": "user", "content": text})
+            _trim_history(session.history, deps.settings.MAX_HISTORY)
+            session.reply_task = asyncio.create_task(_generate_reply(browser, deps, session))
+        else:
+            await maybe_barge_in(browser, session)
+    await session.drain()
+
+
+class _SessionClosed(Exception):
+    """Sentinel that unwinds the session TaskGroup when one pump returns — i.e. the
+    browser disconnected or the STT socket closed. Raising it cancels the sibling pump."""
+
+
+async def _until_closed(pump: Awaitable[None]) -> None:
+    """Run a pump to its natural end, then raise to close the session TaskGroup."""
+    await pump
+    raise _SessionClosed
+
+
+async def run_session(browser: _Browser, deps: Deps) -> None:
+    """Run one browser session: greet, then cascade STT -> LLM -> TTS until either
+    side closes. All credentials stay server-side."""
+    reason = unavailable_reason(deps.settings)
+    if reason is not None:
+        await browser.send({"type": "session.error", "message": reason})
+        return
+    try:
+        stt = await deps.connect_stt()
+    except Exception as exc:  # any connect/setup failure becomes one clean session.error
+        await browser.send(
+            {"type": "session.error", "message": f"Could not start the session: {exc}"}
+        )
+        return
+
+    session = Session()
+    # Seed history with the greeting so the model has a record of its opening line.
+    session.history.append({"role": "assistant", "content": deps.settings.GREETING})
+    session.reply_task = asyncio.create_task(_speak(browser, deps, deps.settings.GREETING))
+    try:
+        # Race the two pumps: whichever returns first (browser hangs up → mic; STT
+        # socket closes → listen) raises _SessionClosed, and the TaskGroup cancels the
+        # other pump for us — no manual cancel/gather bookkeeping.
+        async with asyncio.TaskGroup() as tg:
+            tg.create_task(_until_closed(_pump_mic(browser, stt)))
+            tg.create_task(_until_closed(_pump_stt(browser, stt, deps, session)))
+    except* _SessionClosed:
+        pass
+    finally:
+        await session.cancel_reply()
+        await _safe_close(stt)
+
+
+class FastAPIBrowser:
+    """Adapts a Starlette WebSocket to the (send, recv) shape run_session expects.
+    recv() returns None when the client disconnects, so the pumps exit cleanly."""
+
+    def __init__(self, websocket: WebSocket) -> None:
+        self._ws = websocket
+
+    async def send(self, event: dict[str, object]) -> None:
+        await self._ws.send_json(event)
+
+    async def recv(self) -> dict[str, object] | None:
+        from fastapi import WebSocketDisconnect
+
+        try:
+            data: dict[str, object] = await self._ws.receive_json()
+        except WebSocketDisconnect:
+            return None
+        return data
diff --git a/aai_cli/init/templates/agent_framework/api/index.py b/aai_cli/init/templates/agent_framework/api/index.py
new file mode 100644
index 00000000..f65a6990
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/api/index.py
@@ -0,0 +1,37 @@
+"""Talk to a cascaded voice agent — AssemblyAI agent-framework starter (FastAPI).
+
+The browser opens one WebSocket to this backend, which runs the cascade itself —
+Streaming STT -> LLM Gateway -> streaming TTS — so your API key never reaches the
+client. Streaming TTS is sandbox-only, so scaffold with `assembly --sandbox init
+agent-framework` and use a sandbox key.
+
+  WS /ws  <- {"type":"input.audio","audio":<b64 pcm>} ; -> transcripts + reply.audio
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from fastapi import FastAPI, WebSocket
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+
+from . import cascade, settings
+
+ROOT = Path(__file__).resolve().parent.parent
+STATIC = ROOT / "static"
+app = FastAPI()
+app.mount("/static", StaticFiles(directory=STATIC), name="static")
+
+
+@app.get("/")
+def index() -> FileResponse:
+    return FileResponse(STATIC / "index.html")
+
+
+@app.websocket("/ws")
+async def ws(websocket: WebSocket) -> None:
+    """Accept the browser socket and run one cascade session over it."""
+    await websocket.accept()
+    browser = cascade.FastAPIBrowser(websocket)
+    await cascade.run_session(browser, cascade.Deps.real(settings))
diff --git a/aai_cli/init/templates/agent_framework/api/settings.py b/aai_cli/init/templates/agent_framework/api/settings.py
new file mode 100644
index 00000000..8cfda0a5
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/api/settings.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+API_KEY: str = os.environ.get("ASSEMBLYAI_API_KEY", "")
+
+# Hosts. `assembly init` pins these to the active environment. Streaming TTS only
+# exists in the sandbox, so this whole cascade is sandbox-only (see README); the
+# defaults point at the sandbox so a bare clone works with a sandbox key.
+STREAMING_HOST: str = os.environ.get(
+    "ASSEMBLYAI_STREAMING_HOST", "streaming.sandbox000.assemblyai-labs.com"
+)
+TTS_HOST: str = os.environ.get(
+    "ASSEMBLYAI_TTS_HOST", "streaming-tts.sandbox000.assemblyai-labs.com"
+)
+LLM_GATEWAY_URL: str = os.environ.get(
+    "ASSEMBLYAI_LLM_GATEWAY_URL", "https://llm-gateway.sandbox000.assemblyai-labs.com/v1"
+)
+
+# The cascade's knobs — edit these to change behavior.
+MODEL: str = "claude-haiku-4-5-20251001"
+VOICE: str = "jane"
+SYSTEM_PROMPT: str = (
+    "You are a friendly, concise voice assistant. Keep replies short and conversational. "
+    "Your reply is read aloud by a text-to-speech engine, so write plain spoken prose — "
+    "no markdown, emoji, bullet lists, or code."
+)
+GREETING: str = "Hi! I'm your AssemblyAI voice agent. What can I help you with?"
+MAX_HISTORY: int = 40  # keep the last N messages of conversation context (sliding window)
+
+# 16 kHz PCM in (Streaming v3); 24 kHz PCM out (streaming TTS).
+INPUT_SAMPLE_RATE: int = 16000
+OUTPUT_SAMPLE_RATE: int = 24000
diff --git a/aai_cli/init/templates/audio-transcription/dockerignore b/aai_cli/init/templates/agent_framework/dockerignore
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/dockerignore
rename to aai_cli/init/templates/agent_framework/dockerignore
diff --git a/aai_cli/init/templates/agent_framework/env.example b/aai_cli/init/templates/agent_framework/env.example
new file mode 100644
index 00000000..6a119b9e
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/env.example
@@ -0,0 +1,6 @@
+ASSEMBLYAI_API_KEY=your_assemblyai_api_key_here
+# This cascade uses streaming TTS, which is sandbox-only — use a sandbox key and the
+# sandbox hosts (assembly --sandbox init agent-framework fills these in for you):
+# ASSEMBLYAI_STREAMING_HOST=streaming.sandbox000.assemblyai-labs.com
+# ASSEMBLYAI_TTS_HOST=streaming-tts.sandbox000.assemblyai-labs.com
+# ASSEMBLYAI_LLM_GATEWAY_URL=https://llm-gateway.sandbox000.assemblyai-labs.com/v1
diff --git a/aai_cli/init/templates/audio-transcription/gitignore b/aai_cli/init/templates/agent_framework/gitignore
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/gitignore
rename to aai_cli/init/templates/agent_framework/gitignore
diff --git a/aai_cli/init/templates/agent_framework/requirements.txt b/aai_cli/init/templates/agent_framework/requirements.txt
new file mode 100644
index 00000000..72fea20c
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/requirements.txt
@@ -0,0 +1,8 @@
+fastapi>=0.136.3
+uvicorn>=0.30.0
+websockets>=14.1
+openai>=1.54.0
+python-dotenv>=1.2.2
+# Pin starlette directly: FastAPI's own floor still admits versions with known CVEs,
+# so raise the transitive floor above them.
+starlette>=1.2.1
diff --git a/aai_cli/init/templates/audio-transcription/runtime.txt b/aai_cli/init/templates/agent_framework/runtime.txt
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/runtime.txt
rename to aai_cli/init/templates/agent_framework/runtime.txt
diff --git a/aai_cli/init/templates/agent_framework/static/app.js b/aai_cli/init/templates/agent_framework/static/app.js
new file mode 100644
index 00000000..388edaea
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/static/app.js
@@ -0,0 +1,130 @@
+const SESSION_CONFIG = {
+  inputSampleRate: 16000,
+  outputSampleRate: 24000,
+  processorBufferSize: 4096,
+  microphone: { audio: { echoCancellation: true, noiseSuppression: false } },
+};
+
+const connBtn = document.getElementById("conn");
+const statusEl = document.getElementById("status");
+const logEl = document.getElementById("log");
+
+let ws = null;
+let micPipeline = null;
+let player = null;
+let connected = false;
+
+connBtn.addEventListener("click", () =>
+  connected ? hangup() : connect().catch(fail),
+);
+
+function setStatus(message, state) {
+  statusEl.textContent = message;
+  statusEl.dataset.state = state;
+}
+
+function wsUrl() {
+  const scheme = location.protocol === "https:" ? "wss" : "ws";
+  return `${scheme}://${location.host}/ws`;
+}
+
+async function connect() {
+  setStatus("Connecting...", "idle");
+  ws = new WebSocket(wsUrl());
+  ws.onopen = () => startMic().catch(fail);
+  ws.onmessage = (event) => onEvent(JSON.parse(event.data));
+  ws.onerror = () => fail("WebSocket error");
+  ws.onclose = () => {
+    if (connected) hangup();
+  };
+}
+
+async function startMic() {
+  // Create the player first: the server speaks the greeting the instant the
+  // socket opens, so `reply.audio` can arrive before getUserMedia's permission
+  // prompt resolves. Setting `player` synchronously here (before the first
+  // await) guarantees it exists when onEvent handles that first audio frame.
+  player = AudioHelpers.createPcmPlayer({
+    sampleRate: SESSION_CONFIG.outputSampleRate,
+  });
+  await player.resume();
+  const stream = await navigator.mediaDevices.getUserMedia(
+    SESSION_CONFIG.microphone,
+  );
+  micPipeline = AudioHelpers.createMicrophonePipeline(stream, {
+    bufferSize: SESSION_CONFIG.processorBufferSize,
+  });
+  await micPipeline.start((frame, sampleRate) => {
+    if (!ws || ws.readyState !== WebSocket.OPEN) return;
+    const pcm = AudioHelpers.downsampleToPCM(
+      frame,
+      sampleRate,
+      SESSION_CONFIG.inputSampleRate,
+    );
+    ws.send(
+      JSON.stringify({
+        type: "input.audio",
+        audio: AudioHelpers.bytesToB64(pcm),
+      }),
+    );
+  });
+
+  connected = true;
+  connBtn.textContent = "■ Hang up";
+  connBtn.dataset.state = "connected";
+  setStatus("● Connected - just talk", "live");
+}
+
+function onEvent(event) {
+  switch (event.type) {
+    case "transcript.user":
+      return addTurn("you", "You", event.text);
+    case "transcript.agent":
+      return addTurn("agent", "Agent", event.text);
+    case "reply.audio":
+      if (player) player.playBase64Chunk(event.data);
+      return;
+    case "input.speech.started":
+      return bargeIn();
+    case "reply.done":
+      return;
+    case "session.error":
+      return fail(event.message || "session error");
+  }
+}
+
+function bargeIn() {
+  if (player) player.stopQueuedAudio();
+}
+
+function addTurn(speakerKind, speaker, text) {
+  if (!text) return;
+  const turn = document.createElement("div");
+  turn.className = "conversation-turn";
+  turn.dataset.speaker = speakerKind;
+  const who = document.createElement("span");
+  who.className = "turn-speaker";
+  who.textContent = speaker + ": ";
+  turn.append(who, document.createTextNode(text));
+  logEl.appendChild(turn);
+  turn.scrollIntoView({ block: "end" });
+}
+
+function hangup() {
+  connected = false;
+  connBtn.textContent = "● Connect";
+  connBtn.dataset.state = "idle";
+  setStatus("Disconnected", "idle");
+  bargeIn();
+  if (ws && ws.readyState === WebSocket.OPEN) ws.close();
+  if (micPipeline) micPipeline.close();
+  if (player) player.close();
+  ws = null;
+  micPipeline = null;
+  player = null;
+}
+
+function fail(message) {
+  setStatus("Error: " + message, "error");
+  if (connected) hangup();
+}
diff --git a/aai_cli/init/templates/voice-agent/static/audio.js b/aai_cli/init/templates/agent_framework/static/audio.js
similarity index 100%
rename from aai_cli/init/templates/voice-agent/static/audio.js
rename to aai_cli/init/templates/agent_framework/static/audio.js
diff --git a/aai_cli/init/templates/agent_framework/static/index.html b/aai_cli/init/templates/agent_framework/static/index.html
new file mode 100644
index 00000000..f6809cfc
--- /dev/null
+++ b/aai_cli/init/templates/agent_framework/static/index.html
@@ -0,0 +1,37 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Talk to a cascaded voice agent · AssemblyAI</title>
+  <link rel="stylesheet" href="/static/styles.css" />
+</head>
+<body class="template-page voice-agent-template">
+  <main class="app-shell">
+    <a class="brand" href="https://www.assemblyai.com" target="_blank" rel="noopener">
+      <img class="brand-logo" src="https://www.assemblyai.com/_aai/images/logos/assemblyai-logo-primary.svg" alt="AssemblyAI" />
+    </a>
+
+    <header class="page-header">
+      <span class="eyebrow">Streaming STT · LLM Gateway · TTS</span>
+      <h1 class="page-title">Talk to a cascaded voice agent</h1>
+      <p class="page-subtitle">Connect and just talk. This agent is a cascade your backend wires together — Streaming STT transcribes you, the LLM Gateway replies, and streaming TTS speaks it back, with turn detection and barge-in handled server-side. Your API key stays on the server. Headphones give the cleanest result.</p>
+    </header>
+
+    <div class="control-bar">
+      <button id="conn" class="button connection-button" data-state="idle">● Connect</button>
+      <span id="status" class="status-pill" aria-live="polite"></span>
+    </div>
+
+    <div id="log" class="conversation-log"></div>
+
+    <footer class="page-footer">
+      <span>Built with AssemblyAI</span>
+      <a class="footer-link" href="https://www.assemblyai.com" target="_blank" rel="noopener">assemblyai.com →</a>
+    </footer>
+  </main>
+
+  <script src="/static/audio.js"></script>
+  <script src="/static/app.js"></script>
+</body>
+</html>
diff --git a/aai_cli/init/templates/voice-agent/static/styles.css b/aai_cli/init/templates/agent_framework/static/styles.css
similarity index 100%
rename from aai_cli/init/templates/voice-agent/static/styles.css
rename to aai_cli/init/templates/agent_framework/static/styles.css
diff --git a/aai_cli/init/templates/audio-transcription/vercel.json b/aai_cli/init/templates/agent_framework/vercel.json
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/vercel.json
rename to aai_cli/init/templates/agent_framework/vercel.json
diff --git a/aai_cli/init/templates/audio-transcription/AGENTS.md b/aai_cli/init/templates/audio_transcription/AGENTS.md
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/AGENTS.md
rename to aai_cli/init/templates/audio_transcription/AGENTS.md
diff --git a/aai_cli/init/templates/live-captions/Dockerfile b/aai_cli/init/templates/audio_transcription/Dockerfile
similarity index 100%
rename from aai_cli/init/templates/live-captions/Dockerfile
rename to aai_cli/init/templates/audio_transcription/Dockerfile
diff --git a/aai_cli/init/templates/live-captions/Procfile b/aai_cli/init/templates/audio_transcription/Procfile
similarity index 100%
rename from aai_cli/init/templates/live-captions/Procfile
rename to aai_cli/init/templates/audio_transcription/Procfile
diff --git a/aai_cli/init/templates/audio-transcription/README.md b/aai_cli/init/templates/audio_transcription/README.md
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/README.md
rename to aai_cli/init/templates/audio_transcription/README.md
diff --git a/aai_cli/init/templates/voice-agent/api/__init__.py b/aai_cli/init/templates/audio_transcription/__init__.py
similarity index 100%
rename from aai_cli/init/templates/voice-agent/api/__init__.py
rename to aai_cli/init/templates/audio_transcription/__init__.py
diff --git a/aai_cli/init/templates/audio_transcription/api/__init__.py b/aai_cli/init/templates/audio_transcription/api/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/aai_cli/init/templates/audio-transcription/api/index.py b/aai_cli/init/templates/audio_transcription/api/index.py
similarity index 81%
rename from aai_cli/init/templates/audio-transcription/api/index.py
rename to aai_cli/init/templates/audio_transcription/api/index.py
index 6ab7dbb5..5a29fd14 100644
--- a/aai_cli/init/templates/audio-transcription/api/index.py
+++ b/aai_cli/init/templates/audio_transcription/api/index.py
@@ -16,6 +16,7 @@
 import tempfile
 import uuid
 from pathlib import Path
+from typing import Protocol
 
 import assemblyai as aai
 from assemblyai.api import get_transcript  # single non-blocking GET (see status())
@@ -25,7 +26,7 @@
 from fastapi.staticfiles import StaticFiles
 from openai import OpenAI  # the LLM Gateway is OpenAI-compatible
 
-from api import settings
+from . import settings
 
 aai.settings.api_key = settings.API_KEY
 # Target the same AssemblyAI environment the key was minted for. `assembly init` writes
@@ -33,7 +34,17 @@
 if settings.ASSEMBLYAI_BASE_URL:
     aai.settings.base_url = settings.ASSEMBLYAI_BASE_URL
 
-CONFIG = aai.TranscriptionConfig(**settings.TRANSCRIPTION_CONFIG_KWARGS)
+# Build the config from settings.TRANSCRIPTION_CONFIG_KWARGS by reading each flag by name.
+# (A bare `**dict[str, bool]` unpack can't type-check against the SDK's heterogeneous
+# __init__, so we pass each boolean feature explicitly.)
+_FEATURES = settings.TRANSCRIPTION_CONFIG_KWARGS
+CONFIG = aai.TranscriptionConfig(
+    speaker_labels=_FEATURES.get("speaker_labels", False),
+    auto_chapters=_FEATURES.get("auto_chapters", False),
+    sentiment_analysis=_FEATURES.get("sentiment_analysis", False),
+    entity_detection=_FEATURES.get("entity_detection", False),
+    auto_highlights=_FEATURES.get("auto_highlights", False),
+)
 
 ROOT = Path(__file__).resolve().parent.parent
 STATIC = ROOT / "static"
@@ -104,6 +115,17 @@ def ask(transcript_id: str = Body(...), question: str = Body(...)) -> dict[str,
     return {"answer": resp.choices[0].message.content or ""}
 
 
+class _Serializable(Protocol):
+    """The pydantic-model surface we use: a `.dict()` returning the full JSON."""
+
+    def dict(self) -> dict[str, object]: ...
+
+
+def _to_payload(model: _Serializable) -> dict[str, object]:
+    """Serialize the transcript model to a JSON-ready dict (typed via the protocol)."""
+    return model.dict()
+
+
 @app.get("/api/status/{transcript_id}")
 def status(transcript_id: str) -> dict[str, object]:
     _require_key()
@@ -116,5 +138,5 @@ def status(transcript_id: str) -> dict[str, object]:
     if t.status == aai.TranscriptStatus.error:
         raise HTTPException(status_code=502, detail=t.error or "Transcription failed")
     if t.status == aai.TranscriptStatus.completed:
-        return {"status": "completed", "transcript": t.dict()}
+        return {"status": "completed", "transcript": _to_payload(t)}
     return {"status": str(getattr(t.status, "value", t.status))}
diff --git a/aai_cli/init/templates/audio-transcription/api/settings.py b/aai_cli/init/templates/audio_transcription/api/settings.py
similarity index 74%
rename from aai_cli/init/templates/audio-transcription/api/settings.py
rename to aai_cli/init/templates/audio_transcription/api/settings.py
index 6ee69e06..3ec9ecc9 100644
--- a/aai_cli/init/templates/audio-transcription/api/settings.py
+++ b/aai_cli/init/templates/audio_transcription/api/settings.py
@@ -16,8 +16,9 @@
 # Public sample so the app works immediately without uploading a local file.
 SAMPLE_URL = "https://assembly.ai/wildfires.mp3"
 
-# Main backend customization point. Add, remove, or tune AssemblyAI features here.
-TRANSCRIPTION_CONFIG_KWARGS = {
+# Main backend customization point. Toggle AssemblyAI audio-intelligence features here;
+# api/index.py reads each flag by name when it builds the TranscriptionConfig.
+TRANSCRIPTION_CONFIG_KWARGS: dict[str, bool] = {
     "speaker_labels": True,
     "auto_chapters": True,
     "sentiment_analysis": True,
diff --git a/aai_cli/init/templates/live-captions/dockerignore b/aai_cli/init/templates/audio_transcription/dockerignore
similarity index 100%
rename from aai_cli/init/templates/live-captions/dockerignore
rename to aai_cli/init/templates/audio_transcription/dockerignore
diff --git a/aai_cli/init/templates/audio-transcription/env.example b/aai_cli/init/templates/audio_transcription/env.example
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/env.example
rename to aai_cli/init/templates/audio_transcription/env.example
diff --git a/aai_cli/init/templates/live-captions/gitignore b/aai_cli/init/templates/audio_transcription/gitignore
similarity index 100%
rename from aai_cli/init/templates/live-captions/gitignore
rename to aai_cli/init/templates/audio_transcription/gitignore
diff --git a/aai_cli/init/templates/audio-transcription/requirements.txt b/aai_cli/init/templates/audio_transcription/requirements.txt
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/requirements.txt
rename to aai_cli/init/templates/audio_transcription/requirements.txt
diff --git a/aai_cli/init/templates/live-captions/runtime.txt b/aai_cli/init/templates/audio_transcription/runtime.txt
similarity index 100%
rename from aai_cli/init/templates/live-captions/runtime.txt
rename to aai_cli/init/templates/audio_transcription/runtime.txt
diff --git a/aai_cli/init/templates/audio-transcription/static/app.js b/aai_cli/init/templates/audio_transcription/static/app.js
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/static/app.js
rename to aai_cli/init/templates/audio_transcription/static/app.js
diff --git a/aai_cli/init/templates/audio-transcription/static/index.html b/aai_cli/init/templates/audio_transcription/static/index.html
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/static/index.html
rename to aai_cli/init/templates/audio_transcription/static/index.html
diff --git a/aai_cli/init/templates/audio-transcription/static/styles.css b/aai_cli/init/templates/audio_transcription/static/styles.css
similarity index 100%
rename from aai_cli/init/templates/audio-transcription/static/styles.css
rename to aai_cli/init/templates/audio_transcription/static/styles.css
diff --git a/aai_cli/init/templates/live-captions/vercel.json b/aai_cli/init/templates/audio_transcription/vercel.json
similarity index 100%
rename from aai_cli/init/templates/live-captions/vercel.json
rename to aai_cli/init/templates/audio_transcription/vercel.json
diff --git a/aai_cli/init/templates/live-captions/AGENTS.md b/aai_cli/init/templates/live_captions/AGENTS.md
similarity index 100%
rename from aai_cli/init/templates/live-captions/AGENTS.md
rename to aai_cli/init/templates/live_captions/AGENTS.md
diff --git a/aai_cli/init/templates/voice-agent/Dockerfile b/aai_cli/init/templates/live_captions/Dockerfile
similarity index 100%
rename from aai_cli/init/templates/voice-agent/Dockerfile
rename to aai_cli/init/templates/live_captions/Dockerfile
diff --git a/aai_cli/init/templates/voice-agent/Procfile b/aai_cli/init/templates/live_captions/Procfile
similarity index 100%
rename from aai_cli/init/templates/voice-agent/Procfile
rename to aai_cli/init/templates/live_captions/Procfile
diff --git a/aai_cli/init/templates/live-captions/README.md b/aai_cli/init/templates/live_captions/README.md
similarity index 100%
rename from aai_cli/init/templates/live-captions/README.md
rename to aai_cli/init/templates/live_captions/README.md
diff --git a/aai_cli/init/templates/live_captions/__init__.py b/aai_cli/init/templates/live_captions/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/aai_cli/init/templates/live_captions/api/__init__.py b/aai_cli/init/templates/live_captions/api/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/aai_cli/init/templates/live-captions/api/index.py b/aai_cli/init/templates/live_captions/api/index.py
similarity index 69%
rename from aai_cli/init/templates/live-captions/api/index.py
rename to aai_cli/init/templates/live_captions/api/index.py
index 0b91b3ce..81848273 100644
--- a/aai_cli/init/templates/live-captions/api/index.py
+++ b/aai_cli/init/templates/live_captions/api/index.py
@@ -11,14 +11,12 @@
 
 from pathlib import Path
 
-# httpx2 is Pydantic's maintained fork of httpx (API-identical, just renamed) — not a
-# typo. Keep the "2"; see requirements.txt.
-import httpx2
+from assemblyai.streaming.v3 import StreamingClient, StreamingClientOptions
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 
-from api import settings
+from . import settings
 
 ROOT = Path(__file__).resolve().parent.parent
 STATIC = ROOT / "static"
@@ -42,21 +40,20 @@ def index() -> FileResponse:
 
 @app.post("/api/token")
 def token() -> dict[str, str]:
-    """Mint a one-time streaming token. The browser uses it to open the WebSocket."""
+    """Mint a one-time streaming token via the AssemblyAI SDK. The browser uses it to open the WebSocket."""
     _require_key()
-    # NOTE: the streaming token uses the raw API key as Authorization (no 'Bearer').
     try:
-        resp = httpx2.get(
-            f"https://{settings.STREAMING_HOST}{settings.TOKEN_PATH}",
-            params={"expires_in_seconds": settings.TOKEN_EXPIRES_IN_SECONDS},
-            headers={"Authorization": settings.API_KEY},
+        client = StreamingClient(
+            StreamingClientOptions(api_key=settings.API_KEY, api_host=settings.STREAMING_HOST)
+        )
+        streaming_token = client.create_temporary_token(
+            expires_in_seconds=settings.TOKEN_EXPIRES_IN_SECONDS
         )
-        resp.raise_for_status()
     except Exception as exc:  # missing/invalid key, network -> clean 502, not a 500
         raise HTTPException(
             status_code=502, detail=f"Could not mint streaming token: {exc}"
         ) from exc
     return {
-        "token": resp.json()["token"],
+        "token": streaming_token,
         "ws_url": f"wss://{settings.STREAMING_HOST}{settings.WEBSOCKET_PATH}",
     }
diff --git a/aai_cli/init/templates/live-captions/api/settings.py b/aai_cli/init/templates/live_captions/api/settings.py
similarity index 93%
rename from aai_cli/init/templates/live-captions/api/settings.py
rename to aai_cli/init/templates/live_captions/api/settings.py
index a0b61862..33ffd0fb 100644
--- a/aai_cli/init/templates/live-captions/api/settings.py
+++ b/aai_cli/init/templates/live_captions/api/settings.py
@@ -10,5 +10,4 @@
 # `assembly init` writes this for you; defaults to production. Host only, no scheme.
 STREAMING_HOST = os.environ.get("ASSEMBLYAI_STREAMING_HOST", "streaming.assemblyai.com")
 TOKEN_EXPIRES_IN_SECONDS = 60
-TOKEN_PATH = "/v3/token"
 WEBSOCKET_PATH = "/v3/ws"
diff --git a/aai_cli/init/templates/voice-agent/dockerignore b/aai_cli/init/templates/live_captions/dockerignore
similarity index 100%
rename from aai_cli/init/templates/voice-agent/dockerignore
rename to aai_cli/init/templates/live_captions/dockerignore
diff --git a/aai_cli/init/templates/live-captions/env.example b/aai_cli/init/templates/live_captions/env.example
similarity index 100%
rename from aai_cli/init/templates/live-captions/env.example
rename to aai_cli/init/templates/live_captions/env.example
diff --git a/aai_cli/init/templates/voice-agent/gitignore b/aai_cli/init/templates/live_captions/gitignore
similarity index 100%
rename from aai_cli/init/templates/voice-agent/gitignore
rename to aai_cli/init/templates/live_captions/gitignore
diff --git a/aai_cli/init/templates/voice-agent/requirements.txt b/aai_cli/init/templates/live_captions/requirements.txt
similarity index 90%
rename from aai_cli/init/templates/voice-agent/requirements.txt
rename to aai_cli/init/templates/live_captions/requirements.txt
index 7d5bb7a2..fe0f2b61 100644
--- a/aai_cli/init/templates/voice-agent/requirements.txt
+++ b/aai_cli/init/templates/live_captions/requirements.txt
@@ -1,6 +1,6 @@
 fastapi>=0.136.3
 uvicorn>=0.30.0
-httpx2>=2.3.0
+assemblyai>=0.64.4,<1
 python-dotenv>=1.2.2
 # Pin starlette directly: FastAPI's own floor (starlette>=0.46.0) still admits
 # versions with known CVEs, so raise the transitive floor above them.
diff --git a/aai_cli/init/templates/voice-agent/runtime.txt b/aai_cli/init/templates/live_captions/runtime.txt
similarity index 100%
rename from aai_cli/init/templates/voice-agent/runtime.txt
rename to aai_cli/init/templates/live_captions/runtime.txt
diff --git a/aai_cli/init/templates/live-captions/static/app.js b/aai_cli/init/templates/live_captions/static/app.js
similarity index 100%
rename from aai_cli/init/templates/live-captions/static/app.js
rename to aai_cli/init/templates/live_captions/static/app.js
diff --git a/aai_cli/init/templates/live-captions/static/audio.js b/aai_cli/init/templates/live_captions/static/audio.js
similarity index 100%
rename from aai_cli/init/templates/live-captions/static/audio.js
rename to aai_cli/init/templates/live_captions/static/audio.js
diff --git a/aai_cli/init/templates/live-captions/static/index.html b/aai_cli/init/templates/live_captions/static/index.html
similarity index 100%
rename from aai_cli/init/templates/live-captions/static/index.html
rename to aai_cli/init/templates/live_captions/static/index.html
diff --git a/aai_cli/init/templates/live-captions/static/styles.css b/aai_cli/init/templates/live_captions/static/styles.css
similarity index 100%
rename from aai_cli/init/templates/live-captions/static/styles.css
rename to aai_cli/init/templates/live_captions/static/styles.css
diff --git a/aai_cli/init/templates/voice-agent/vercel.json b/aai_cli/init/templates/live_captions/vercel.json
similarity index 100%
rename from aai_cli/init/templates/voice-agent/vercel.json
rename to aai_cli/init/templates/live_captions/vercel.json
diff --git a/aai_cli/init/templates/voice-agent/AGENTS.md b/aai_cli/init/templates/voice_agent/AGENTS.md
similarity index 100%
rename from aai_cli/init/templates/voice-agent/AGENTS.md
rename to aai_cli/init/templates/voice_agent/AGENTS.md
diff --git a/aai_cli/init/templates/voice_agent/Dockerfile b/aai_cli/init/templates/voice_agent/Dockerfile
new file mode 100644
index 00000000..73deb13c
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/Dockerfile
@@ -0,0 +1,24 @@
+# Container image for Fly.io, Railway, Render (Docker), and Cloudflare Containers.
+# Vercel ignores this and builds api/index.py as a serverless function instead.
+FROM python:3.13-slim
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+WORKDIR /app
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+# Run as a non-root user (container hardening; the app only reads its files and binds
+# a non-privileged port, so it needs no elevated privileges). python:3.13-slim is
+# Debian-based, so `useradd` is available.
+RUN useradd --create-home appuser
+USER appuser
+
+# Fly reads EXPOSE to set its fly.toml internal_port; keep it in sync with the CMD
+# default so the proxy and the app agree on the port.
+EXPOSE 8080
+# Shell form so ${PORT} expands. Railway/Render inject $PORT; Fly maps to 8080.
+CMD python -m uvicorn api.index:app --host 0.0.0.0 --port ${PORT:-8080}
diff --git a/aai_cli/init/templates/voice_agent/Procfile b/aai_cli/init/templates/voice_agent/Procfile
new file mode 100644
index 00000000..8837c118
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/Procfile
@@ -0,0 +1 @@
+web: python -m uvicorn api.index:app --host 0.0.0.0 --port ${PORT:-3000}
diff --git a/aai_cli/init/templates/voice-agent/README.md b/aai_cli/init/templates/voice_agent/README.md
similarity index 100%
rename from aai_cli/init/templates/voice-agent/README.md
rename to aai_cli/init/templates/voice_agent/README.md
diff --git a/aai_cli/init/templates/voice_agent/__init__.py b/aai_cli/init/templates/voice_agent/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/aai_cli/init/templates/voice_agent/api/__init__.py b/aai_cli/init/templates/voice_agent/api/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/aai_cli/init/templates/voice-agent/api/index.py b/aai_cli/init/templates/voice_agent/api/index.py
similarity index 98%
rename from aai_cli/init/templates/voice-agent/api/index.py
rename to aai_cli/init/templates/voice_agent/api/index.py
index d6cda54b..d86f38a6 100644
--- a/aai_cli/init/templates/voice-agent/api/index.py
+++ b/aai_cli/init/templates/voice_agent/api/index.py
@@ -18,7 +18,7 @@
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 
-from api import settings
+from . import settings
 
 ROOT = Path(__file__).resolve().parent.parent
 STATIC = ROOT / "static"
diff --git a/aai_cli/init/templates/voice-agent/api/settings.py b/aai_cli/init/templates/voice_agent/api/settings.py
similarity index 100%
rename from aai_cli/init/templates/voice-agent/api/settings.py
rename to aai_cli/init/templates/voice_agent/api/settings.py
diff --git a/aai_cli/init/templates/voice_agent/dockerignore b/aai_cli/init/templates/voice_agent/dockerignore
new file mode 100644
index 00000000..c6c282ad
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/dockerignore
@@ -0,0 +1,6 @@
+.env
+.venv
+__pycache__/
+*.pyc
+.git/
+.gitignore
diff --git a/aai_cli/init/templates/voice-agent/env.example b/aai_cli/init/templates/voice_agent/env.example
similarity index 100%
rename from aai_cli/init/templates/voice-agent/env.example
rename to aai_cli/init/templates/voice_agent/env.example
diff --git a/aai_cli/init/templates/voice_agent/gitignore b/aai_cli/init/templates/voice_agent/gitignore
new file mode 100644
index 00000000..5b01785a
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/gitignore
@@ -0,0 +1,3 @@
+.env
+.venv
+__pycache__/
diff --git a/aai_cli/init/templates/live-captions/requirements.txt b/aai_cli/init/templates/voice_agent/requirements.txt
similarity index 100%
rename from aai_cli/init/templates/live-captions/requirements.txt
rename to aai_cli/init/templates/voice_agent/requirements.txt
diff --git a/aai_cli/init/templates/voice_agent/runtime.txt b/aai_cli/init/templates/voice_agent/runtime.txt
new file mode 100644
index 00000000..d2aca3a7
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/runtime.txt
@@ -0,0 +1 @@
+python-3.12
diff --git a/aai_cli/init/templates/voice-agent/static/app.js b/aai_cli/init/templates/voice_agent/static/app.js
similarity index 100%
rename from aai_cli/init/templates/voice-agent/static/app.js
rename to aai_cli/init/templates/voice_agent/static/app.js
diff --git a/aai_cli/init/templates/voice_agent/static/audio.js b/aai_cli/init/templates/voice_agent/static/audio.js
new file mode 100644
index 00000000..bda694c9
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/static/audio.js
@@ -0,0 +1,101 @@
+function createMicrophonePipeline(stream, options = {}) {
+  const bufferSize = options.bufferSize || 4096;
+  const AudioContextClass = window.AudioContext || window.webkitAudioContext;
+  const audioCtx = new AudioContextClass();
+  const source = audioCtx.createMediaStreamSource(stream);
+  const processor = audioCtx.createScriptProcessor(bufferSize, 1, 1);
+
+  return {
+    audioCtx,
+    async start(onFrame) {
+      await audioCtx.resume();
+      source.connect(processor);
+      processor.connect(audioCtx.destination);
+      processor.onaudioprocess = (event) => {
+        onFrame(event.inputBuffer.getChannelData(0), audioCtx.sampleRate);
+      };
+    },
+    close() {
+      processor.disconnect();
+      stream.getTracks().forEach((track) => track.stop());
+      audioCtx.close();
+    },
+  };
+}
+
+function createPcmPlayer(options = {}) {
+  const sampleRate = options.sampleRate || 24000;
+  const AudioContextClass = window.AudioContext || window.webkitAudioContext;
+  const playCtx = new AudioContextClass({ sampleRate });
+  let playHead = 0;
+  let sources = [];
+
+  return {
+    async resume() {
+      await playCtx.resume();
+    },
+    playBase64Chunk(base64Audio) {
+      const int16 = b64ToInt16(base64Audio);
+      const buffer = playCtx.createBuffer(1, int16.length, sampleRate);
+      const channel = buffer.getChannelData(0);
+      for (let i = 0; i < int16.length; i++) channel[i] = int16[i] / 0x8000;
+
+      const source = playCtx.createBufferSource();
+      source.buffer = buffer;
+      source.connect(playCtx.destination);
+      const startAt = Math.max(playCtx.currentTime, playHead);
+      source.start(startAt);
+      playHead = startAt + buffer.duration;
+      sources.push(source);
+      source.onended = () => {
+        sources = sources.filter((item) => item !== source);
+      };
+    },
+    stopQueuedAudio() {
+      sources.forEach((source) => {
+        try {
+          source.stop();
+        } catch (_) {}
+      });
+      sources = [];
+      playHead = 0;
+    },
+    close() {
+      this.stopQueuedAudio();
+      playCtx.close();
+    },
+  };
+}
+
+function downsampleToPCM(input, inputRate, outputRate) {
+  const ratio = inputRate / outputRate;
+  const outputLength = Math.floor(input.length / ratio);
+  const output = new Int16Array(outputLength);
+  for (let i = 0; i < outputLength; i++) {
+    const sample = Math.max(-1, Math.min(1, input[Math.floor(i * ratio)]));
+    output[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
+  }
+  return output.buffer;
+}
+
+function bytesToB64(buffer) {
+  let binary = "";
+  const bytes = new Uint8Array(buffer);
+  for (let i = 0; i < bytes.length; i++)
+    binary += String.fromCharCode(bytes[i]);
+  return btoa(binary);
+}
+
+function b64ToInt16(base64Audio) {
+  const binary = atob(base64Audio);
+  const bytes = new Uint8Array(binary.length);
+  for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
+  return new Int16Array(bytes.buffer);
+}
+
+window.AudioHelpers = {
+  createMicrophonePipeline,
+  createPcmPlayer,
+  downsampleToPCM,
+  bytesToB64,
+};
diff --git a/aai_cli/init/templates/voice-agent/static/index.html b/aai_cli/init/templates/voice_agent/static/index.html
similarity index 100%
rename from aai_cli/init/templates/voice-agent/static/index.html
rename to aai_cli/init/templates/voice_agent/static/index.html
diff --git a/aai_cli/init/templates/voice_agent/static/styles.css b/aai_cli/init/templates/voice_agent/static/styles.css
new file mode 100644
index 00000000..2ea6cc6f
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/static/styles.css
@@ -0,0 +1,329 @@
+/* THEME TOKENS — AssemblyAI design system. Edit this block first to retheme.
+   Brand fonts hotlink from assemblyai.com (CORS-open) with system fallbacks. */
+@font-face {
+  font-family: "Oceanic Text";
+  src: url("https://www.assemblyai.com/_aai/fonts/rebrand/OceanicText-Regular.otf")
+    format("opentype");
+  font-weight: 400;
+  font-display: swap;
+}
+@font-face {
+  font-family: "UN 11ST";
+  src: url("https://www.assemblyai.com/_aai/fonts/rebrand/UN_11ST_Regular.woff2")
+    format("woff2");
+  font-weight: 400;
+  font-display: swap;
+}
+@font-face {
+  font-family: "UN 11ST";
+  src: url("https://www.assemblyai.com/_aai/fonts/rebrand/UN_11ST_Bold.woff2")
+    format("woff2");
+  font-weight: 700;
+  font-display: swap;
+}
+@font-face {
+  font-family: "Modern Gothic Mono";
+  src: url("https://www.assemblyai.com/_aai/fonts/rebrand/ModernGothicMono-Regular.woff2")
+    format("woff2");
+  font-weight: 400;
+  font-display: swap;
+}
+
+:root {
+  /* Brand fonts */
+  --font-display: "Oceanic Text", Georgia, serif;
+  --font-body: "UN 11ST", system-ui, sans-serif;
+  --font-mono: "Modern Gothic Mono", "JetBrains Mono", monospace;
+
+  /* Cobolt (brand purple — the only accent) */
+  --color-cobolt-500: #3923c7;
+  --color-cobolt-300: #887bdd;
+  --color-cobolt-200: #b0a7e9;
+  --color-cobolt-100: #d7d3f4;
+
+  /* Warm neutrals */
+  --color-black-500: #1d1b16;
+  --color-black-400: #4a4945;
+  --color-black-300: #777673;
+  --color-white-100: #ffffff;
+  --color-white-200: #fdfcf8;
+  --color-white-300: #f5f3eb;
+  --color-neutral-100: #ecebe5;
+  --color-neutral-200: #dad7cb;
+  --color-neutral-300: #c7c3b2;
+
+  /* Blush (error highlight only) */
+  --color-blush-200: #f4d4d0;
+  --color-blush-500: #e39389;
+
+  /* Semantic aliases — downstream rules use these */
+  --color-bg: var(--color-white-200);
+  --color-surface: var(--color-white-100);
+  --color-text: var(--color-black-400);
+  --color-text-dark: var(--color-black-500);
+  --color-text-muted: var(--color-black-300);
+  --color-border: var(--color-neutral-300);
+  --color-accent: var(--color-cobolt-500);
+  --color-accent-hover: var(--color-cobolt-300);
+  --color-accent-contrast: var(--color-white-100);
+  --color-user: var(--color-black-400);
+  --color-connected: var(--color-black-500);
+
+  --shadow-focus: 0 0 0 3px var(--color-cobolt-100);
+
+  --radius-cta: 4px;
+  --radius-control: 8px;
+  --radius-panel: 12px;
+  --radius-pill: 9999px;
+  --space-page-block: 64px;
+  --space-page-inline: 24px;
+  --content-width: 720px;
+}
+
+/* BASE */
+* {
+  box-sizing: border-box;
+}
+
+body {
+  min-height: 100vh;
+  margin: 0;
+  padding: var(--space-page-block) var(--space-page-inline);
+  background: var(--color-bg);
+  color: var(--color-text);
+  font: 16px/1.3 var(--font-body);
+  -moz-osx-font-smoothing: grayscale;
+  -webkit-font-smoothing: antialiased;
+  text-rendering: optimizeLegibility;
+}
+
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+  font-family: var(--font-display);
+  font-weight: 400;
+  line-height: 1;
+  color: var(--color-text-dark);
+}
+
+button {
+  appearance: none;
+  font: inherit;
+}
+
+/* LAYOUT */
+.app-shell {
+  max-width: var(--content-width);
+  margin: 0 auto;
+}
+
+/* BRAND CHROME */
+.brand {
+  display: inline-flex;
+  align-items: center;
+  margin-bottom: 40px;
+}
+
+.brand-logo {
+  display: block;
+  width: auto;
+  height: 20px;
+}
+
+.eyebrow {
+  display: inline-flex;
+  align-items: center;
+  margin-bottom: 16px;
+  padding: 8px 16px;
+  border: 1px solid var(--color-border);
+  border-radius: var(--radius-cta);
+  color: var(--color-black-400);
+  font-family: var(--font-mono);
+  font-size: 12px;
+  letter-spacing: 1.2px;
+  text-transform: uppercase;
+  font-feature-settings: "ss09" 1;
+}
+
+.page-header {
+  margin-bottom: 32px;
+}
+
+.page-title {
+  margin: 0 0 12px;
+  font-size: 48px;
+  letter-spacing: -2.4px;
+}
+
+.page-subtitle {
+  max-width: 60ch;
+  margin: 0;
+  font-size: 18px;
+  color: var(--color-text-muted);
+}
+
+.page-footer {
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  margin-top: 64px;
+  padding-top: 24px;
+  border-top: 1px solid var(--color-border);
+  color: var(--color-text-muted);
+  font-size: 14px;
+}
+
+.footer-link {
+  color: var(--color-accent);
+  font-family: var(--font-mono);
+  font-size: 12px;
+  letter-spacing: 1.2px;
+  text-transform: uppercase;
+  text-decoration: none;
+  transition: color 0.2s ease;
+}
+
+.footer-link:hover {
+  color: var(--color-accent-hover);
+}
+
+.control-bar {
+  display: flex;
+  align-items: center;
+  flex-wrap: wrap;
+  gap: 12px;
+}
+
+/* CONTROLS */
+.button {
+  --button-bg: var(--color-accent);
+  --button-fg: var(--color-accent-contrast);
+  display: inline-flex;
+  min-height: 40px;
+  align-items: center;
+  justify-content: center;
+  border: 0;
+  border-radius: var(--radius-cta);
+  background: var(--button-bg);
+  color: var(--button-fg);
+  cursor: pointer;
+  font-family: var(--font-mono);
+  font-size: 14px;
+  letter-spacing: 1.4px;
+  text-transform: uppercase;
+  padding: 0 18px;
+  white-space: nowrap;
+  transition: background-color 0.2s ease;
+}
+
+.button:hover:not(:disabled) {
+  --button-bg: var(--color-accent-hover);
+}
+
+.button:focus-visible {
+  box-shadow: var(--shadow-focus);
+  outline: none;
+}
+
+.button:disabled {
+  cursor: default;
+  opacity: 0.55;
+}
+
+.connection-button[data-state="connected"] {
+  --button-bg: var(--color-connected);
+}
+
+.connection-button[data-state="connected"]:hover:not(:disabled) {
+  --button-bg: var(--color-black-400);
+}
+
+/* STATUS STATES */
+.status-pill {
+  display: inline-flex;
+  align-items: center;
+  border: 1px solid var(--color-neutral-300);
+  border-radius: var(--radius-pill);
+  background: var(--color-neutral-200);
+  color: var(--color-black-300);
+  font-size: 14px;
+  padding: 5px 14px;
+}
+
+.status-pill:empty {
+  display: none;
+}
+
+.status-pill[data-state="live"] {
+  border-color: var(--color-cobolt-200);
+  background: var(--color-cobolt-100);
+  color: var(--color-cobolt-500);
+}
+
+.status-pill[data-state="idle"] {
+  border-color: var(--color-neutral-300);
+  background: var(--color-neutral-200);
+  color: var(--color-black-300);
+}
+
+.status-pill[data-state="error"] {
+  border-color: var(--color-blush-500);
+  background: var(--color-blush-200);
+  color: var(--color-black-500);
+}
+
+/* CONVERSATION VIEW */
+.conversation-log {
+  margin-top: 24px;
+}
+
+.conversation-turn {
+  margin: 8px 0;
+  border-left: 3px solid var(--color-border);
+  padding: 8px 0 8px 12px;
+  overflow-wrap: anywhere;
+}
+
+.conversation-turn[data-speaker="you"] {
+  border-left-color: var(--color-user);
+}
+
+.conversation-turn[data-speaker="agent"] {
+  border-left-color: var(--color-accent);
+}
+
+.turn-speaker {
+  font-family: var(--font-mono);
+  font-size: 12px;
+  letter-spacing: 1.2px;
+  text-transform: uppercase;
+}
+
+.conversation-turn[data-speaker="you"] .turn-speaker {
+  color: var(--color-user);
+}
+
+.conversation-turn[data-speaker="agent"] .turn-speaker {
+  color: var(--color-accent);
+}
+
+@media (max-width: 768px) {
+  :root {
+    --space-page-block: 40px;
+  }
+
+  .page-title {
+    font-size: 30px;
+    letter-spacing: -1.5px;
+  }
+
+  .button,
+  .status-pill {
+    width: 100%;
+  }
+}
diff --git a/aai_cli/init/templates/voice_agent/vercel.json b/aai_cli/init/templates/voice_agent/vercel.json
new file mode 100644
index 00000000..10e8a7c1
--- /dev/null
+++ b/aai_cli/init/templates/voice_agent/vercel.json
@@ -0,0 +1,4 @@
+{
+  "$schema": "https://openapi.vercel.sh/vercel.json",
+  "framework": "fastapi"
+}
diff --git a/docs/superpowers/plans/2026-06-15-agent-framework-template.md b/docs/superpowers/plans/2026-06-15-agent-framework-template.md
new file mode 100644
index 00000000..56237064
--- /dev/null
+++ b/docs/superpowers/plans/2026-06-15-agent-framework-template.md
@@ -0,0 +1,1570 @@
+# `agent-framework` init template — Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add a fourth `assembly init` starter template, `agent-framework`, with the same browser UI as `voice-agent` but built on a server-orchestrated **cascade** — Streaming STT → LLM Gateway → sandbox TTS — instead of the all-in-one Voice Agent endpoint.
+
+**Architecture:** The browser opens one same-origin WebSocket (`/ws`) to a FastAPI backend. The backend runs the cascade: forwards mic PCM to the Streaming v3 STT socket, detects end-of-turn, streams the transcript through the OpenAI-compatible LLM Gateway, synthesizes the reply over the sandbox streaming-TTS socket, and streams audio back. All three API credentials stay server-side. The orchestrator (`api/cascade.py`) is built with injected connect-factories + LLM callable so it is fully testable with fakes (mirroring `aai_cli/tts/session.py`).
+
+**Tech Stack:** FastAPI + Starlette WebSockets, `websockets` async client (STT + TTS), `openai.AsyncOpenAI` (streamed gateway completion), `uvicorn`. Buildless static HTML/CSS/JS frontend.
+
+---
+
+## Key constraints discovered (read before starting)
+
+- **Sandbox-only.** `streaming_tts_host` is empty in production, so the whole cascade must target `sandbox000` with a sandbox key. The backend fails fast (a `session.error` event, *not* an import error) when the TTS host is empty.
+- **Settings must not raise at import.** `tests/test_init_template_serve.py::test_serves_root_and_static_assets` is parametrized over every template dir; it imports `api.index` and hits `GET /`. The empty-TTS-host guard therefore lives in the WS handler, never at module import.
+- **Template `.py` is coverage- and mutation-gated.** Confirmed: `coverage.xml` includes `init/templates/.../api/*.py`. diff-cover requires 100% patch coverage of new template lines and the mutation gate mutates them, so `cascade.py` needs real, asserting tests.
+- **The contract `_STDLIB` set is incomplete.** `tests/test_init_template_contract.py::test_requirements_cover_backend_imports` treats any import not in its `_STDLIB` set as third-party and demands it in `requirements.txt`. `asyncio`, `base64`, `contextlib`, `dataclasses`, `collections`, `urllib` are stdlib we use — extend `_STDLIB` (Task 1).
+- **Two hard-coded test assertions break when the registry grows:** `tests/test_init_command.py::test_init_template_arg_help_is_derived_from_registry` (exact help string) and the `assembly init --help` snapshot `tests/__snapshots__/test_snapshots_help_build.ambr`. Update the first by hand (Task 1); regenerate the second with `--snapshot-update` (Task 9).
+- `openai>=2.41.0` and `websockets>=16.0` are **main project deps**, so the dev env can import `cascade.py` for the serve test. The template's own `requirements.txt` pins its independent floors.
+
+## File structure
+
+New template dir `aai_cli/init/templates/agent-framework/`:
+- `api/__init__.py` — empty package marker.
+- `api/settings.py` — env-derived config; no import-time raise.
+- `api/cascade.py` — pure helpers + the injectable async orchestrator + the FastAPI browser adapter.
+- `api/index.py` — FastAPI app: static mount, `GET /`, `@app.websocket("/ws")` adapter.
+- `static/index.html` — copy of voice-agent's page, cascade-worded.
+- `static/styles.css` — verbatim copy of voice-agent's.
+- `static/audio.js` — verbatim copy of voice-agent's.
+- `static/app.js` — same event handling as voice-agent; connects to `/ws` directly.
+- `README.md`, `AGENTS.md`, `env.example`, `gitignore`, `requirements.txt`, `Procfile`, `Dockerfile`, `dockerignore`, `runtime.txt`, `vercel.json`.
+
+Shared CLI edits:
+- `aai_cli/init/templates.py` — register the template.
+- `aai_cli/app/init_exec.py` — inject `ASSEMBLYAI_TTS_HOST` into scaffolded `.env`.
+
+Test edits:
+- `tests/test_init_template_contract.py` — extend `_STDLIB`.
+- `tests/test_init_command.py` — update the exact help-string assertion.
+- `tests/test_init_template_agent_framework.py` — NEW bespoke tests.
+- `tests/__snapshots__/test_snapshots_help_build.ambr` — regenerated.
+
+---
+
+## Task 1: CLI wiring (register template, inject TTS host, fix gated assertions)
+
+**Files:**
+- Modify: `aai_cli/init/templates.py`
+- Modify: `aai_cli/app/init_exec.py:91-104`
+- Modify: `tests/test_init_template_contract.py` (the `_STDLIB` constant)
+- Modify: `tests/test_init_command.py` (exact help string)
+- Test: `tests/test_init_command.py`, `tests/test_init_templates.py`
+
+- [ ] **Step 1: Update the failing registry expectations first (TDD red)**
+
+In `tests/test_init_command.py`, update the exact-help assertion to include the new id (appended last):
+
+```python
+    assert default.help == (
+        "Template to scaffold: audio-transcription, live-captions, voice-agent, "
+        "agent-framework (omit to pick interactively)"
+    )
+```
+
+- [ ] **Step 2: Run it to confirm it now fails (registry not updated yet)**
+
+Run: `uv run pytest tests/test_init_command.py::test_init_template_arg_help_is_derived_from_registry tests/test_init_templates.py -q`
+Expected: FAIL — `test_order_matches_registry`/`test_every_shipped_directory_is_registered` and the help-string test disagree with the registry.
+
+- [ ] **Step 3: Register the template**
+
+In `aai_cli/init/templates.py`, add the entry and order (append after `voice-agent`):
+
+```python
+TEMPLATES: dict[str, str] = {
+    "audio-transcription": "Audio Transcription",
+    "live-captions": "Live Captions",
+    "voice-agent": "Voice Agent",
+    "agent-framework": "Agent Framework",
+}
+
+# Display order for the picker and `--help`.
+TEMPLATE_ORDER: tuple[str, ...] = (
+    "audio-transcription",
+    "live-captions",
+    "voice-agent",
+    "agent-framework",
+)
+```
+
+- [ ] **Step 4: Inject the TTS host into scaffolded `.env`**
+
+In `aai_cli/app/init_exec.py`, add the TTS host to `_active_env_vars()` (the cascade template reads it; empty in prod, which the template treats as "sandbox required"):
+
+```python
+    return {
+        "ASSEMBLYAI_BASE_URL": env.api_base,
+        "ASSEMBLYAI_LLM_GATEWAY_URL": env.llm_gateway_base,
+        "ASSEMBLYAI_STREAMING_HOST": env.streaming_host,
+        # Voice Agent host mirrors the streaming host's naming across environments.
+        "ASSEMBLYAI_AGENTS_HOST": env.streaming_host.replace("streaming", "agents", 1),
+        # Streaming-TTS host for the cascade (agent-framework) template. Empty in
+        # production, where streaming TTS has no host; that template then refuses to
+        # run and points at --sandbox.
+        "ASSEMBLYAI_TTS_HOST": env.streaming_tts_host,
+    }
+```
+
+- [ ] **Step 5: Extend the contract test's stdlib set**
+
+In `tests/test_init_template_contract.py`, widen `_STDLIB` so the cascade's stdlib imports aren't mistaken for third-party packages:
+
+```python
+_STDLIB = {
+    "os",
+    "tempfile",
+    "uuid",
+    "pathlib",
+    "__future__",
+    "json",
+    "typing",
+    "asyncio",
+    "base64",
+    "contextlib",
+    "dataclasses",
+    "collections",
+    "urllib",
+}
+```
+
+- [ ] **Step 6: Add an assertion pinning the new env var (mutation coverage for Step 4)**
+
+In `tests/test_init_command.py`, beside the existing `_active_env_vars` test (~line 312), add:
+
+```python
+def test_active_env_vars_includes_streaming_tts_host(monkeypatch):
+    fake = SimpleNamespace(
+        api_base="https://api.x",
+        llm_gateway_base="https://llm.x/v1",
+        streaming_host="streaming.x",
+        streaming_tts_host="streaming-tts.x",
+    )
+    monkeypatch.setattr(init_exec.environments, "active", lambda: fake)
+    assert init_exec._active_env_vars()["ASSEMBLYAI_TTS_HOST"] == "streaming-tts.x"
+```
+
+(Use the same `SimpleNamespace`/`monkeypatch` shape as the neighboring test; import `SimpleNamespace` from `types` if not already imported.)
+
+- [ ] **Step 7: Run the registry + command tests (they pass except for the missing dir)**
+
+Run: `uv run pytest tests/test_init_templates.py tests/test_init_command.py -q`
+Expected: `test_every_registered_template_has_a_directory` FAILS (dir not created yet); everything else PASSES. This failure is resolved in Task 6 when `api/index.py` lands. Proceed.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add aai_cli/init/templates.py aai_cli/app/init_exec.py tests/test_init_template_contract.py tests/test_init_command.py
+git commit -m "feat(init): register agent-framework template + inject TTS host"
+```
+
+---
+
+## Task 2: Template skeleton + verbatim static assets
+
+**Files:**
+- Create: `aai_cli/init/templates/agent-framework/api/__init__.py`
+- Create (copy): `aai_cli/init/templates/agent-framework/static/styles.css`
+- Create (copy): `aai_cli/init/templates/agent-framework/static/audio.js`
+
+- [ ] **Step 1: Create the directory and copy the verbatim assets**
+
+Run:
+
+```bash
+SRC=aai_cli/init/templates/voice-agent
+DST=aai_cli/init/templates/agent-framework
+mkdir -p "$DST/api" "$DST/static"
+: > "$DST/api/__init__.py"
+cp "$SRC/static/styles.css" "$DST/static/styles.css"
+cp "$SRC/static/audio.js" "$DST/static/audio.js"
+```
+
+`styles.css` and `audio.js` are reused unchanged — the UI and the mic-pipeline/PCM-player/barge-in helpers are identical to `voice-agent`.
+
+- [ ] **Step 2: Verify the copies are byte-identical**
+
+Run: `diff aai_cli/init/templates/voice-agent/static/styles.css aai_cli/init/templates/agent-framework/static/styles.css && diff aai_cli/init/templates/voice-agent/static/audio.js aai_cli/init/templates/agent-framework/static/audio.js && echo OK`
+Expected: `OK`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/api/__init__.py aai_cli/init/templates/agent-framework/static/styles.css aai_cli/init/templates/agent-framework/static/audio.js
+git commit -m "feat(agent-framework): skeleton + shared static assets"
+```
+
+---
+
+## Task 3: `settings.py` + availability guard
+
+**Files:**
+- Create: `aai_cli/init/templates/agent-framework/api/settings.py`
+- Test: `tests/test_init_template_agent_framework.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Create `tests/test_init_template_agent_framework.py`:
+
+```python
+"""Hermetic tests for the agent-framework (cascaded voice agent) template.
+
+The template ships a standalone FastAPI app under api/; load it by path with its
+own `api` package, evicting any other template's cached `api` modules so imports
+stay collision-free under pytest-xdist / pytest-randomly.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import importlib
+import json
+import sys
+from pathlib import Path
+from types import ModuleType
+
+import pytest
+
+TEMPLATE_DIR = Path("aai_cli/init/templates/agent-framework")
+
+
+def _load(module: str, monkeypatch: pytest.MonkeyPatch, **env: str) -> ModuleType:
+    for key, value in env.items():
+        monkeypatch.setenv(key, value)
+    for name in ("api.index", "api.cascade", "api.settings", "api"):
+        sys.modules.pop(name, None)
+    monkeypatch.syspath_prepend(str(TEMPLATE_DIR))
+    return importlib.import_module(module)
+
+
+def test_settings_imports_without_key_or_tts_host(monkeypatch):
+    # isolate_env strips ambient vars; with nothing set the module must still import
+    # (the empty-host guard lives in the WS handler, not at import).
+    monkeypatch.delenv("ASSEMBLYAI_API_KEY", raising=False)
+    monkeypatch.delenv("ASSEMBLYAI_TTS_HOST", raising=False)
+    settings = _load("api.settings", monkeypatch)
+    assert settings.API_KEY == ""
+    assert settings.MODEL == "claude-haiku-4-5-20251001"
+    assert settings.VOICE == "ivy"
+    assert settings.INPUT_SAMPLE_RATE == 16000
+    assert settings.OUTPUT_SAMPLE_RATE == 24000
+
+
+def test_settings_reads_env(monkeypatch):
+    settings = _load(
+        "api.settings",
+        monkeypatch,
+        ASSEMBLYAI_API_KEY="sk-test",
+        ASSEMBLYAI_STREAMING_HOST="streaming.example",
+        ASSEMBLYAI_TTS_HOST="tts.example",
+        ASSEMBLYAI_LLM_GATEWAY_URL="https://llm.example/v1",
+    )
+    assert settings.API_KEY == "sk-test"
+    assert settings.STREAMING_HOST == "streaming.example"
+    assert settings.TTS_HOST == "tts.example"
+    assert settings.LLM_GATEWAY_URL == "https://llm.example/v1"
+```
+
+- [ ] **Step 2: Run to verify it fails**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q`
+Expected: FAIL with `ModuleNotFoundError: No module named 'api.settings'`.
+
+- [ ] **Step 3: Write `settings.py`**
+
+Create `aai_cli/init/templates/agent-framework/api/settings.py`:
+
+```python
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+API_KEY = os.environ.get("ASSEMBLYAI_API_KEY", "")
+
+# Hosts. `assembly init` pins these to the active environment. Streaming TTS only
+# exists in the sandbox, so this whole cascade is sandbox-only (see README); the
+# defaults point at the sandbox so a bare clone works with a sandbox key.
+STREAMING_HOST = os.environ.get(
+    "ASSEMBLYAI_STREAMING_HOST", "streaming.sandbox000.assemblyai-labs.com"
+)
+TTS_HOST = os.environ.get("ASSEMBLYAI_TTS_HOST", "streaming-tts.sandbox000.assemblyai-labs.com")
+LLM_GATEWAY_URL = os.environ.get(
+    "ASSEMBLYAI_LLM_GATEWAY_URL", "https://llm-gateway.sandbox000.assemblyai-labs.com/v1"
+)
+
+# The cascade's three knobs — edit these to change behavior.
+MODEL = "claude-haiku-4-5-20251001"
+VOICE = "ivy"
+SYSTEM_PROMPT = (
+    "You are a friendly, concise voice assistant. Keep replies short and conversational."
+)
+GREETING = "Hi! I'm your AssemblyAI voice agent. What can I help you with?"
+
+# 16 kHz PCM in (Streaming v3); 24 kHz PCM out (streaming TTS).
+INPUT_SAMPLE_RATE = 16000
+OUTPUT_SAMPLE_RATE = 24000
+```
+
+- [ ] **Step 4: Run to verify it passes**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q`
+Expected: PASS (3 tests).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/api/settings.py tests/test_init_template_agent_framework.py
+git commit -m "feat(agent-framework): settings module"
+```
+
+---
+
+## Task 4: `cascade.py` pure helpers
+
+**Files:**
+- Create: `aai_cli/init/templates/agent-framework/api/cascade.py`
+- Test: `tests/test_init_template_agent_framework.py` (append)
+
+- [ ] **Step 1: Write the failing tests**
+
+Append to `tests/test_init_template_agent_framework.py`:
+
+```python
+def _cascade(monkeypatch) -> ModuleType:
+    return _load("api.cascade", monkeypatch, ASSEMBLYAI_API_KEY="sk-test")
+
+
+def test_unavailable_reason_missing_key(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.API_KEY = ""
+    settings.TTS_HOST = "tts.example"
+    assert "ASSEMBLYAI_API_KEY" in cascade.unavailable_reason(settings)
+
+
+def test_unavailable_reason_missing_tts_host(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = ""
+    reason = cascade.unavailable_reason(settings)
+    assert "sandbox" in reason and "assembly --sandbox init agent-framework" in reason
+
+
+def test_unavailable_reason_ok(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    assert cascade.unavailable_reason(settings) is None
+
+
+def test_stt_url_carries_streaming_params(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.STREAMING_HOST = "streaming.example"
+    settings.INPUT_SAMPLE_RATE = 16000
+    url = cascade.stt_url(settings)
+    assert url.startswith("wss://streaming.example/v3/ws?")
+    assert "sample_rate=16000" in url
+    assert "encoding=pcm_s16le" in url
+    assert "format_turns=true" in url
+
+
+def test_tts_url_carries_voice_and_rate(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.TTS_HOST = "tts.example"
+    settings.VOICE = "ivy"
+    settings.OUTPUT_SAMPLE_RATE = 24000
+    url = cascade.tts_url(settings)
+    assert url.startswith("wss://tts.example/v1/ws/?")
+    assert "voice=ivy" in url
+    assert "sample_rate=24000" in url
+
+
+def test_is_final_user_turn(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    assert cascade.is_final_user_turn({"end_of_turn": True, "turn_is_formatted": True}) is True
+    assert cascade.is_final_user_turn({"end_of_turn": True, "turn_is_formatted": False}) is False
+    assert cascade.is_final_user_turn({"end_of_turn": False, "turn_is_formatted": True}) is False
+    assert cascade.is_final_user_turn({}) is False
+
+
+def test_build_messages(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    messages = cascade.build_messages("be brief", "hello there")
+    assert messages == [
+        {"role": "system", "content": "be brief"},
+        {"role": "user", "content": "hello there"},
+    ]
+```
+
+- [ ] **Step 2: Run to verify it fails**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q -k "unavailable or url or final or build"`
+Expected: FAIL with `ModuleNotFoundError: No module named 'api.cascade'`.
+
+- [ ] **Step 3: Write the helper section of `cascade.py`**
+
+Create `aai_cli/init/templates/agent-framework/api/cascade.py` with the imports and pure helpers (the orchestrator is added in Task 5, the adapter in Task 6 — write them as one growing file):
+
+```python
+"""Server-side cascade orchestrator for the agent-framework template.
+
+The browser opens one WebSocket to FastAPI and the backend wires three AssemblyAI
+primitives together — Streaming STT, the LLM Gateway, and streaming TTS — so every
+credential stays on the server. The orchestrator takes injected connect-factories and
+an LLM callable (`Deps`) so it runs hermetically against fakes in tests, the same
+seam `aai_cli/tts/session.py` uses.
+
+Browser protocol (identical to the voice-agent template):
+  in : {"type": "input.audio", "audio": <base64 PCM>}
+  out: transcript.user / transcript.agent / reply.audio (base64 in `data`) /
+       input.speech.started / reply.done / session.error
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import contextlib
+import json
+from collections.abc import AsyncIterator, Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any
+from urllib.parse import urlencode
+
+
+def unavailable_reason(settings: Any) -> str | None:
+    """Why the cascade can't run, or None when it can.
+
+    Streaming TTS has no production host, so an empty TTS host means the user must
+    re-scaffold against the sandbox.
+    """
+    if not settings.API_KEY:
+        return "ASSEMBLYAI_API_KEY is not set — configure it in your deployment's environment."
+    if not settings.TTS_HOST:
+        return (
+            "Streaming TTS has no production host, so this cascade is sandbox-only. "
+            "Re-scaffold against the sandbox: assembly --sandbox init agent-framework."
+        )
+    return None
+
+
+def stt_url(settings: Any) -> str:
+    """The Streaming v3 WebSocket URL with PCM + turn-formatting params."""
+    params = urlencode(
+        {
+            "sample_rate": settings.INPUT_SAMPLE_RATE,
+            "encoding": "pcm_s16le",
+            "speech_model": "u3-rt-pro",
+            "format_turns": "true",
+        }
+    )
+    return f"wss://{settings.STREAMING_HOST}/v3/ws?{params}"
+
+
+def tts_url(settings: Any) -> str:
+    """The streaming-TTS WebSocket URL for the configured voice and sample rate."""
+    params = urlencode({"voice": settings.VOICE, "sample_rate": settings.OUTPUT_SAMPLE_RATE})
+    return f"wss://{settings.TTS_HOST}/v1/ws/?{params}"
+
+
+def is_final_user_turn(msg: dict[str, Any]) -> bool:
+    """True for a finalized, formatted end-of-turn (the cue to reply)."""
+    return bool(msg.get("end_of_turn")) and bool(msg.get("turn_is_formatted"))
+
+
+def build_messages(system_prompt: str, user_text: str) -> list[dict[str, str]]:
+    """The chat `messages` array for one user turn."""
+    return [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_text},
+    ]
+```
+
+- [ ] **Step 4: Run to verify it passes**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q -k "unavailable or url or final or build"`
+Expected: PASS (8 tests).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/api/cascade.py tests/test_init_template_agent_framework.py
+git commit -m "feat(agent-framework): cascade pure helpers"
+```
+
+---
+
+## Task 5: `cascade.py` orchestrator (the cascade itself)
+
+**Files:**
+- Modify: `aai_cli/init/templates/agent-framework/api/cascade.py` (append)
+- Test: `tests/test_init_template_agent_framework.py` (append)
+
+- [ ] **Step 1: Write the failing tests (fakes + each stage + happy path)**
+
+Append to `tests/test_init_template_agent_framework.py`:
+
+```python
+class FakeBrowser:
+    """A browser side: hands out queued inbound messages, then blocks forever so the
+    mic pump stays alive until the test cancels it (mirrors a still-connected client)."""
+
+    def __init__(self, inbound: list[dict] | None = None):
+        self._inbound = list(inbound or [])
+        self.sent: list[dict] = []
+        self._idle = asyncio.Event()  # never set -> recv() blocks after the queue drains
+
+    async def send(self, event: dict) -> None:
+        self.sent.append(event)
+
+    async def recv(self) -> dict | None:
+        if self._inbound:
+            return self._inbound.pop(0)
+        await self._idle.wait()
+        return None
+
+    def types(self) -> list[str]:
+        return [event["type"] for event in self.sent]
+
+
+class FakeWS:
+    """A fake STT/TTS socket: yields the given frames as JSON strings, records sends."""
+
+    def __init__(self, frames: list[dict] | None = None):
+        self._frames = [json.dumps(f) for f in (frames or [])]
+        self.sent: list[Any] = []
+        self.closed = False
+
+    def __aiter__(self) -> "FakeWS":
+        return self
+
+    async def __anext__(self) -> str:
+        if not self._frames:
+            raise StopAsyncIteration
+        return self._frames.pop(0)
+
+    async def recv(self) -> str:
+        if not self._frames:
+            raise AssertionError("recv() past end of fake frames")
+        return self._frames.pop(0)
+
+    async def send(self, data: Any) -> None:
+        self.sent.append(data)
+
+    async def close(self) -> None:
+        self.closed = True
+
+
+def _deps(monkeypatch, *, stt, tts_frames, llm_text):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.GREETING = "hello!"
+    settings.SYSTEM_PROMPT = "be brief"
+
+    async def llm_stream(_messages):
+        for piece in llm_text:
+            yield piece
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(stt),
+        connect_tts=_async_return(FakeWS(tts_frames)),
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    return cascade, deps
+
+
+def _async_return(value):
+    async def factory():
+        return value
+
+    return factory
+
+
+def test_pump_mic_forwards_decoded_audio(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    pcm = b"\x01\x02\x03\x04"
+    browser = FakeBrowser([{"type": "input.audio", "audio": base64.b64encode(pcm).decode()}])
+    stt = FakeWS()
+
+    async def drive():
+        # recv() returns the one message, then we cancel by feeding a disconnect.
+        browser._inbound.append(None)  # type: ignore[arg-type]
+        await cascade._pump_mic(browser, stt)
+
+    asyncio.run(drive())
+    assert stt.sent == [pcm]
+
+
+def test_pump_mic_ignores_non_audio_and_stops_on_disconnect(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser([{"type": "noise"}, None])  # type: ignore[list-item]
+    stt = FakeWS()
+    asyncio.run(cascade._pump_mic(browser, stt))
+    assert stt.sent == []
+
+
+def test_synthesize_streams_audio_frames(monkeypatch):
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[
+            {"type": "Begin", "configuration": {"sample_rate": 24000}},
+            {"type": "Audio", "audio": "AAA="},
+            {"type": "Audio", "audio": "BBB=", "is_final": True},
+        ],
+        llm_text=[],
+    )
+    browser = FakeBrowser()
+    tts = FakeWS(
+        [
+            {"type": "Begin", "configuration": {"sample_rate": 24000}},
+            {"type": "Audio", "audio": "AAA="},
+            {"type": "Audio", "audio": "BBB=", "is_final": True},
+        ]
+    )
+    asyncio.run(cascade._synthesize(browser, tts, "hi"))
+    assert browser.sent == [
+        {"type": "reply.audio", "data": "AAA="},
+        {"type": "reply.audio", "data": "BBB="},
+    ]
+    # Generate + ForceFlushTextBuffer + Terminate were sent.
+    kinds = [json.loads(s)["type"] for s in tts.sent]
+    assert kinds == ["Generate", "ForceFlushTextBuffer", "Terminate"]
+    assert tts.closed is True
+
+
+def test_synthesize_raises_on_error_frame(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    tts = FakeWS(
+        [{"type": "Begin", "configuration": {}}, {"type": "Error", "error": "bad voice"}]
+    )
+    with pytest.raises(RuntimeError, match="bad voice"):
+        asyncio.run(cascade._synthesize(browser, tts, "hi"))
+
+
+def test_synthesize_raises_when_no_begin(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    tts = FakeWS([{"type": "Audio", "audio": "AAA=", "is_final": True}])
+    with pytest.raises(RuntimeError, match="did not begin"):
+        asyncio.run(cascade._synthesize(browser, tts, "hi"))
+
+
+def test_generate_reply_speaks_llm_text(monkeypatch):
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[
+            {"type": "Begin", "configuration": {}},
+            {"type": "Audio", "audio": "AAA=", "is_final": True},
+        ],
+        llm_text=["Hello", " world"],
+    )
+    browser = FakeBrowser()
+    asyncio.run(cascade._generate_reply(browser, deps, cascade.build_messages("be brief", "hi")))
+    assert {"type": "transcript.agent", "text": "Hello world"} in browser.sent
+    assert {"type": "reply.audio", "data": "AAA="} in browser.sent
+    assert browser.sent[-1] == {"type": "reply.done", "status": "completed"}
+
+
+def test_generate_reply_empty_llm_emits_done(monkeypatch):
+    cascade, deps = _deps(monkeypatch, stt=FakeWS(), tts_frames=[], llm_text=["  "])
+    browser = FakeBrowser()
+    asyncio.run(cascade._generate_reply(browser, deps, []))
+    assert browser.sent == [{"type": "reply.done", "status": "empty"}]
+
+
+def test_maybe_barge_in_cancels_active_reply(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+
+    async def drive():
+        session = cascade.Session()
+        started = asyncio.Event()
+
+        async def never_ending():
+            started.set()
+            await asyncio.Event().wait()
+
+        session.reply_task = asyncio.create_task(never_ending())
+        await started.wait()
+        await cascade.maybe_barge_in(browser, session)
+        return session
+
+    session = asyncio.run(drive())
+    assert browser.sent == [{"type": "input.speech.started"}]
+    assert session.reply_task is None
+
+
+def test_maybe_barge_in_noop_without_reply(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    asyncio.run(cascade.maybe_barge_in(browser, cascade.Session()))
+    assert browser.sent == []
+
+
+def test_run_session_unavailable_emits_error(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.API_KEY = ""
+    browser = FakeBrowser()
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=_async_return(FakeWS()),
+        llm_stream=lambda _m: iter(()),
+        settings=settings,
+    )
+    asyncio.run(cascade.run_session(browser, deps))
+    assert browser.types() == ["session.error"]
+
+
+def test_run_session_happy_path(monkeypatch):
+    # STT yields one finalized user turn, then closes -> the reply drains, then the
+    # session tears down. The greeting speaks first.
+    stt = FakeWS(
+        [{"type": "Turn", "transcript": "what time is it", "end_of_turn": True, "turn_is_formatted": True}]
+    )
+
+    # Each connect_tts call returns a fresh socket (greeting + reply).
+    tts_sockets = [
+        FakeWS([{"type": "Begin", "configuration": {}}, {"type": "Audio", "audio": "G=", "is_final": True}]),
+        FakeWS([{"type": "Begin", "configuration": {}}, {"type": "Audio", "audio": "R=", "is_final": True}]),
+    ]
+    cascade = _cascade(monkeypatch)
+    settings = importlib.import_module("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.GREETING = "hello!"
+    settings.SYSTEM_PROMPT = "be brief"
+
+    async def llm_stream(_messages):
+        yield "It is noon."
+
+    def connect_tts():
+        async def factory():
+            return tts_sockets.pop(0)
+
+        return factory()
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(stt),
+        connect_tts=connect_tts,
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    browser = FakeBrowser()
+    asyncio.run(asyncio.wait_for(cascade.run_session(browser, deps), timeout=5))
+
+    types = browser.types()
+    # Greeting (agent transcript + audio + done), then the user turn, then the reply.
+    assert types[0] == "transcript.agent"  # greeting text
+    assert {"type": "transcript.user", "text": "what time is it"} in browser.sent
+    assert {"type": "transcript.agent", "text": "It is noon."} in browser.sent
+    assert {"type": "reply.audio", "data": "R="} in browser.sent
+    assert browser.sent[-1] == {"type": "reply.done", "status": "completed"}
+    assert stt.closed is True
+```
+
+- [ ] **Step 2: Run to verify it fails**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q -k "pump_mic or synthesize or generate_reply or barge or run_session"`
+Expected: FAIL — `Deps`, `Session`, `run_session`, `_synthesize`, etc. don't exist yet.
+
+- [ ] **Step 3: Append the orchestrator to `cascade.py`**
+
+Add to `aai_cli/init/templates/agent-framework/api/cascade.py` (after the helpers):
+
+```python
+@dataclass
+class Deps:
+    """Injected cascade dependencies. `Deps.real(settings)` wires the live clients;
+    tests pass fakes with the same shapes."""
+
+    connect_stt: Callable[[], Awaitable[Any]]
+    connect_tts: Callable[[], Awaitable[Any]]
+    llm_stream: Callable[[list[dict[str, str]]], AsyncIterator[str]]
+    settings: Any
+
+    @classmethod
+    def real(cls, settings: Any) -> "Deps":
+        return cls(
+            connect_stt=lambda: _connect_stt(settings),
+            connect_tts=lambda: _connect_tts(settings),
+            llm_stream=lambda messages: _llm_stream(settings, messages),
+            settings=settings,
+        )
+
+
+class Session:
+    """Tracks the in-flight reply so a new user turn can barge in and cancel it."""
+
+    def __init__(self) -> None:
+        self.reply_task: asyncio.Task[None] | None = None
+
+    async def cancel_reply(self) -> None:
+        task, self.reply_task = self.reply_task, None
+        if task is not None and not task.done():
+            task.cancel()
+            with contextlib.suppress(asyncio.CancelledError, Exception):
+                await task
+
+    async def drain(self) -> None:
+        """Await the in-flight reply to natural completion (used when STT closes)."""
+        task = self.reply_task
+        if task is not None:
+            with contextlib.suppress(Exception):
+                await task
+
+
+async def _connect_stt(settings: Any) -> Any:
+    import websockets
+
+    return await websockets.connect(
+        stt_url(settings), additional_headers={"Authorization": settings.API_KEY}
+    )
+
+
+async def _connect_tts(settings: Any) -> Any:
+    import websockets
+
+    # max_size=None: a synthesis's Audio frames can exceed the 1 MiB default.
+    return await websockets.connect(
+        tts_url(settings),
+        additional_headers={"Authorization": settings.API_KEY},
+        max_size=None,
+    )
+
+
+async def _llm_stream(settings: Any, messages: list[dict[str, str]]) -> AsyncIterator[str]:
+    from openai import AsyncOpenAI
+
+    client = AsyncOpenAI(base_url=settings.LLM_GATEWAY_URL, api_key=settings.API_KEY)
+    stream = await client.chat.completions.create(
+        model=settings.MODEL, messages=messages, stream=True
+    )
+    async for chunk in stream:
+        delta = chunk.choices[0].delta.content
+        if delta:
+            yield delta
+
+
+async def _safe_close(conn: Any) -> None:
+    with contextlib.suppress(Exception):
+        await conn.close()
+
+
+async def _pump_mic(browser: Any, stt: Any) -> None:
+    """Forward each base64 mic frame from the browser to the STT socket."""
+    while True:
+        msg = await browser.recv()
+        if msg is None:
+            return
+        audio = msg.get("audio") if msg.get("type") == "input.audio" else None
+        if isinstance(audio, str):
+            await stt.send(base64.b64decode(audio))
+
+
+async def _synthesize(browser: Any, tts: Any, text: str) -> None:
+    """Drive the TTS protocol on an open socket, forwarding Audio as reply.audio."""
+    begin = json.loads(await tts.recv())
+    if begin.get("type") != "Begin":
+        raise RuntimeError(f"TTS did not begin (got {begin.get('type')!r}).")
+    await tts.send(json.dumps({"type": "Generate", "text": text}))
+    await tts.send(json.dumps({"type": "ForceFlushTextBuffer"}))
+    while True:
+        frame = json.loads(await tts.recv())
+        kind = frame.get("type")
+        if kind == "Audio":
+            await browser.send({"type": "reply.audio", "data": frame.get("audio", "")})
+            if frame.get("is_final"):
+                break
+        elif kind == "Error":
+            raise RuntimeError(frame.get("error") or "TTS error")
+    with contextlib.suppress(Exception):
+        await tts.send(json.dumps({"type": "Terminate"}))
+
+
+async def _speak(browser: Any, deps: Deps, text: str) -> None:
+    """Emit agent text, synthesize it, and mark the reply done."""
+    await browser.send({"type": "transcript.agent", "text": text})
+    tts = await deps.connect_tts()
+    try:
+        await _synthesize(browser, tts, text)
+    finally:
+        await _safe_close(tts)
+    await browser.send({"type": "reply.done", "status": "completed"})
+
+
+async def _generate_reply(browser: Any, deps: Deps, messages: list[dict[str, str]]) -> None:
+    """Stream the LLM reply, then speak it. Errors surface as session.error."""
+    try:
+        text = "".join([delta async for delta in deps.llm_stream(messages)]).strip()
+        if not text:
+            await browser.send({"type": "reply.done", "status": "empty"})
+            return
+        await _speak(browser, deps, text)
+    except asyncio.CancelledError:
+        raise
+    except Exception as exc:  # noqa: BLE001 — any leg failure becomes one clean event
+        await browser.send({"type": "session.error", "message": str(exc)})
+
+
+async def maybe_barge_in(browser: Any, session: Session) -> None:
+    """If a reply is playing, tell the browser to stop and cancel it."""
+    if session.reply_task is not None and not session.reply_task.done():
+        await browser.send({"type": "input.speech.started"})
+        await session.cancel_reply()
+
+
+async def _pump_stt(browser: Any, stt: Any, deps: Deps, session: Session) -> None:
+    """Read STT turns: emit user transcripts, reply on finalized turns, barge in on
+    interim speech, and drain the last reply when the socket closes."""
+    async for raw in stt:
+        msg = json.loads(raw)
+        if msg.get("type") != "Turn":
+            continue
+        text = msg.get("transcript", "")
+        if not text:
+            continue
+        await browser.send({"type": "transcript.user", "text": text})
+        if is_final_user_turn(msg):
+            await session.cancel_reply()
+            session.reply_task = asyncio.create_task(
+                _generate_reply(browser, deps, build_messages(deps.settings.SYSTEM_PROMPT, text))
+            )
+        else:
+            await maybe_barge_in(browser, session)
+    await session.drain()
+
+
+async def run_session(browser: Any, deps: Deps) -> None:
+    """Run one browser session: greet, then cascade STT -> LLM -> TTS until either
+    side closes. All credentials stay server-side."""
+    reason = unavailable_reason(deps.settings)
+    if reason is not None:
+        await browser.send({"type": "session.error", "message": reason})
+        return
+    try:
+        stt = await deps.connect_stt()
+    except Exception as exc:  # noqa: BLE001
+        await browser.send({"type": "session.error", "message": f"Could not start the session: {exc}"})
+        return
+
+    session = Session()
+    session.reply_task = asyncio.create_task(_speak(browser, deps, deps.settings.GREETING))
+    mic = asyncio.create_task(_pump_mic(browser, stt))
+    listen = asyncio.create_task(_pump_stt(browser, stt, deps, session))
+    try:
+        await asyncio.wait({mic, listen}, return_when=asyncio.FIRST_COMPLETED)
+    finally:
+        mic.cancel()
+        listen.cancel()
+        await asyncio.gather(mic, listen, return_exceptions=True)
+        await session.cancel_reply()
+        await _safe_close(stt)
+```
+
+- [ ] **Step 4: Run to verify it passes**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q -k "pump_mic or synthesize or generate_reply or barge or run_session"`
+Expected: PASS. If `test_run_session_happy_path` is flaky on task scheduling, it should not be — the greeting reply is set before pumps start and `_pump_stt` drains the reply before returning, and the mic pump blocks on `FakeBrowser`'s idle event. If a hang occurs, the `asyncio.wait_for(..., timeout=5)` fails loudly rather than wedging.
+
+- [ ] **Step 5: Format + lint the template module**
+
+Run: `uv run ruff format aai_cli/init/templates/agent-framework/api/cascade.py && uv run ruff check aai_cli/init/templates/agent-framework/api/cascade.py`
+Expected: clean (S105/TID251 are ignored for templates; the `# noqa: BLE001` keeps the broad-except lines clean).
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/api/cascade.py tests/test_init_template_agent_framework.py
+git commit -m "feat(agent-framework): cascade orchestrator"
+```
+
+---
+
+## Task 6: `api/index.py` — FastAPI app + WebSocket adapter
+
+**Files:**
+- Create: `aai_cli/init/templates/agent-framework/api/index.py`
+- Modify: `aai_cli/init/templates/agent-framework/api/cascade.py` (add `FastAPIBrowser`)
+- Test: `tests/test_init_template_agent_framework.py` (append)
+
+- [ ] **Step 1: Write the failing tests**
+
+Append to `tests/test_init_template_agent_framework.py`:
+
+```python
+def test_index_serves_page(monkeypatch):
+    index = _load("api.index", monkeypatch, ASSEMBLYAI_API_KEY="sk-test")
+    from fastapi.testclient import TestClient
+
+    resp = TestClient(index.app).get("/")
+    assert resp.status_code == 200
+    assert "<html" in resp.text.lower()
+
+
+def test_ws_route_runs_cascade(monkeypatch):
+    # Drive the real /ws adapter with TestClient's WebSocket, but stub run_session so
+    # the route's accept + adapter wiring is exercised without real upstreams.
+    index = _load("api.index", monkeypatch, ASSEMBLYAI_API_KEY="sk-test")
+    cascade = importlib.import_module("api.cascade")
+
+    async def fake_run_session(browser, _deps):
+        msg = await browser.recv()
+        await browser.send({"type": "echo", "got": msg})
+
+    monkeypatch.setattr(cascade, "run_session", fake_run_session)
+    from fastapi.testclient import TestClient
+
+    with TestClient(index.app).websocket_connect("/ws") as ws:
+        ws.send_json({"type": "input.audio", "audio": "AAA="})
+        assert ws.receive_json() == {"type": "echo", "got": {"type": "input.audio", "audio": "AAA="}}
+
+
+def test_fastapi_browser_recv_returns_none_on_disconnect(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    from fastapi import WebSocketDisconnect
+
+    class FakeWSStarlette:
+        def __init__(self):
+            self.sent: list[dict] = []
+
+        async def send_json(self, event):
+            self.sent.append(event)
+
+        async def receive_json(self):
+            raise WebSocketDisconnect(code=1000)
+
+    ws = FakeWSStarlette()
+    browser = cascade.FastAPIBrowser(ws)
+
+    async def drive():
+        await browser.send({"type": "x"})
+        return await browser.recv()
+
+    assert asyncio.run(drive()) is None
+    assert ws.sent == [{"type": "x"}]
+```
+
+- [ ] **Step 2: Run to verify it fails**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q -k "index_serves or ws_route or fastapi_browser"`
+Expected: FAIL — `api.index` and `cascade.FastAPIBrowser` don't exist.
+
+- [ ] **Step 3: Add `FastAPIBrowser` to `cascade.py`**
+
+Append to `aai_cli/init/templates/agent-framework/api/cascade.py`:
+
+```python
+class FastAPIBrowser:
+    """Adapts a Starlette WebSocket to the (send, recv) shape run_session expects.
+    recv() returns None when the client disconnects, so the pumps exit cleanly."""
+
+    def __init__(self, websocket: Any) -> None:
+        self._ws = websocket
+
+    async def send(self, event: dict[str, Any]) -> None:
+        await self._ws.send_json(event)
+
+    async def recv(self) -> dict[str, Any] | None:
+        from fastapi import WebSocketDisconnect
+
+        try:
+            return await self._ws.receive_json()
+        except WebSocketDisconnect:
+            return None
+```
+
+- [ ] **Step 4: Write `api/index.py`**
+
+Create `aai_cli/init/templates/agent-framework/api/index.py`:
+
+```python
+"""Talk to a cascaded voice agent — AssemblyAI agent-framework starter (FastAPI).
+
+The browser opens one WebSocket to this backend, which runs the cascade itself —
+Streaming STT -> LLM Gateway -> streaming TTS — so your API key never reaches the
+client. Streaming TTS is sandbox-only, so scaffold with `assembly --sandbox init
+agent-framework` and use a sandbox key.
+
+  WS /ws  <- {"type":"input.audio","audio":<b64 pcm>} ; -> transcripts + reply.audio
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from fastapi import FastAPI, WebSocket
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+
+from api import cascade, settings
+
+ROOT = Path(__file__).resolve().parent.parent
+STATIC = ROOT / "static"
+app = FastAPI()
+app.mount("/static", StaticFiles(directory=STATIC), name="static")
+
+
+@app.get("/")
+def index() -> FileResponse:
+    return FileResponse(STATIC / "index.html")
+
+
+@app.websocket("/ws")
+async def ws(websocket: WebSocket) -> None:
+    """Accept the browser socket and run one cascade session over it."""
+    await websocket.accept()
+    browser = cascade.FastAPIBrowser(websocket)
+    await cascade.run_session(browser, cascade.Deps.real(settings))
+```
+
+- [ ] **Step 5: Run to verify it passes**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py -q`
+Expected: PASS (all tests). Also confirm the registry directory test now passes:
+Run: `uv run pytest tests/test_init_templates.py -q`
+Expected: PASS.
+
+- [ ] **Step 6: Format + lint**
+
+Run: `uv run ruff format aai_cli/init/templates/agent-framework/api/ && uv run ruff check aai_cli/init/templates/agent-framework/api/`
+Expected: clean.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/api/index.py aai_cli/init/templates/agent-framework/api/cascade.py tests/test_init_template_agent_framework.py
+git commit -m "feat(agent-framework): FastAPI app + websocket adapter"
+```
+
+---
+
+## Task 7: Frontend — `index.html` + `app.js`
+
+**Files:**
+- Create: `aai_cli/init/templates/agent-framework/static/index.html`
+- Create: `aai_cli/init/templates/agent-framework/static/app.js`
+
+- [ ] **Step 1: Write `static/index.html`**
+
+Create `aai_cli/init/templates/agent-framework/static/index.html` (same structure/IDs/classes as voice-agent, cascade-worded copy):
+
+```html
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>Talk to a cascaded voice agent · AssemblyAI</title>
+  <link rel="stylesheet" href="/static/styles.css" />
+</head>
+<body class="template-page voice-agent-template">
+  <main class="app-shell">
+    <a class="brand" href="https://www.assemblyai.com" target="_blank" rel="noopener">
+      <img class="brand-logo" src="https://www.assemblyai.com/_aai/images/logos/assemblyai-logo-primary.svg" alt="AssemblyAI" />
+    </a>
+
+    <header class="page-header">
+      <span class="eyebrow">Streaming STT · LLM Gateway · TTS</span>
+      <h1 class="page-title">Talk to a cascaded voice agent</h1>
+      <p class="page-subtitle">Connect and just talk. This agent is a cascade your backend wires together — Streaming STT transcribes you, the LLM Gateway replies, and streaming TTS speaks it back, with turn detection and barge-in handled server-side. Your API key stays on the server. Headphones give the cleanest result.</p>
+    </header>
+
+    <div class="control-bar">
+      <button id="conn" class="button connection-button" data-state="idle">● Connect</button>
+      <span id="status" class="status-pill" aria-live="polite"></span>
+    </div>
+
+    <div id="log" class="conversation-log"></div>
+
+    <footer class="page-footer">
+      <span>Built with AssemblyAI</span>
+      <a class="footer-link" href="https://www.assemblyai.com" target="_blank" rel="noopener">assemblyai.com →</a>
+    </footer>
+  </main>
+
+  <script src="/static/audio.js"></script>
+  <script src="/static/app.js"></script>
+</body>
+</html>
+```
+
+- [ ] **Step 2: Write `static/app.js`**
+
+Create `aai_cli/init/templates/agent-framework/static/app.js`. Same event handling as voice-agent's `onEvent`/`addTurn`/`bargeIn` (so `audio.js` and the UI carry over), but it opens the same-origin `/ws` directly — no token fetch, no `session.update`:
+
+```javascript
+const SESSION_CONFIG = {
+  inputSampleRate: 16000,
+  outputSampleRate: 24000,
+  processorBufferSize: 4096,
+  microphone: { audio: { echoCancellation: true, noiseSuppression: false } },
+};
+
+const connBtn = document.getElementById("conn");
+const statusEl = document.getElementById("status");
+const logEl = document.getElementById("log");
+
+let ws = null;
+let micPipeline = null;
+let player = null;
+let connected = false;
+
+connBtn.addEventListener("click", () =>
+  connected ? hangup() : connect().catch(fail),
+);
+
+function setStatus(message, state) {
+  statusEl.textContent = message;
+  statusEl.dataset.state = state;
+}
+
+function wsUrl() {
+  const scheme = location.protocol === "https:" ? "wss" : "ws";
+  return `${scheme}://${location.host}/ws`;
+}
+
+async function connect() {
+  setStatus("Connecting...", "idle");
+  ws = new WebSocket(wsUrl());
+  ws.onopen = () => startMic().catch(fail);
+  ws.onmessage = (event) => onEvent(JSON.parse(event.data));
+  ws.onerror = () => fail("WebSocket error");
+  ws.onclose = () => {
+    if (connected) hangup();
+  };
+}
+
+async function startMic() {
+  const stream = await navigator.mediaDevices.getUserMedia(
+    SESSION_CONFIG.microphone,
+  );
+  micPipeline = AudioHelpers.createMicrophonePipeline(stream, {
+    bufferSize: SESSION_CONFIG.processorBufferSize,
+  });
+  player = AudioHelpers.createPcmPlayer({
+    sampleRate: SESSION_CONFIG.outputSampleRate,
+  });
+  await player.resume();
+  await micPipeline.start((frame, sampleRate) => {
+    if (!ws || ws.readyState !== WebSocket.OPEN) return;
+    const pcm = AudioHelpers.downsampleToPCM(
+      frame,
+      sampleRate,
+      SESSION_CONFIG.inputSampleRate,
+    );
+    ws.send(
+      JSON.stringify({
+        type: "input.audio",
+        audio: AudioHelpers.bytesToB64(pcm),
+      }),
+    );
+  });
+
+  connected = true;
+  connBtn.textContent = "■ Hang up";
+  connBtn.dataset.state = "connected";
+  setStatus("● Connected - just talk", "live");
+}
+
+function onEvent(event) {
+  switch (event.type) {
+    case "transcript.user":
+      return addTurn("you", "You", event.text);
+    case "transcript.agent":
+      return addTurn("agent", "Agent", event.text);
+    case "reply.audio":
+      return player.playBase64Chunk(event.data);
+    case "input.speech.started":
+      return bargeIn();
+    case "reply.done":
+      if (event.status === "interrupted") bargeIn();
+      return;
+    case "session.error":
+      return fail(event.message || "session error");
+  }
+}
+
+function bargeIn() {
+  if (player) player.stopQueuedAudio();
+}
+
+function addTurn(speakerKind, speaker, text) {
+  if (!text) return;
+  const turn = document.createElement("div");
+  turn.className = "conversation-turn";
+  turn.dataset.speaker = speakerKind;
+  const who = document.createElement("span");
+  who.className = "turn-speaker";
+  who.textContent = speaker + ": ";
+  turn.append(who, document.createTextNode(text));
+  logEl.appendChild(turn);
+  turn.scrollIntoView({ block: "end" });
+}
+
+function hangup() {
+  connected = false;
+  connBtn.textContent = "● Connect";
+  connBtn.dataset.state = "idle";
+  setStatus("Disconnected", "idle");
+  bargeIn();
+  if (ws && ws.readyState === WebSocket.OPEN) ws.close();
+  if (micPipeline) micPipeline.close();
+  if (player) player.close();
+  ws = null;
+  micPipeline = null;
+  player = null;
+}
+
+function fail(message) {
+  setStatus("Error: " + message, "error");
+  if (connected) hangup();
+}
+```
+
+- [ ] **Step 3: Prettier-format the JS/CSS (the gate runs `prettier --check`)**
+
+Run: `prettier --write "aai_cli/init/templates/agent-framework/static/*.js" "aai_cli/init/templates/agent-framework/static/*.css"`
+Then verify: `prettier --check "aai_cli/init/templates/agent-framework/static/*.{js,css}"`
+Expected: "All matched files use Prettier code style!"
+
+- [ ] **Step 4: Verify the frontend↔backend route contract + static refs**
+
+Run: `uv run pytest "tests/test_init_template_contract.py::test_static_assets_referenced_by_html_exist[agent-framework]" "tests/test_init_template_contract.py::test_frontend_routes_exist_in_backend[agent-framework]" -q`
+Expected: PASS (the page references `styles.css`/`audio.js`/`app.js`, all present; it fetches no `/api/*` path — it uses a WebSocket — so the route check is satisfied trivially).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/static/index.html aai_cli/init/templates/agent-framework/static/app.js
+git commit -m "feat(agent-framework): frontend (cascade UI + /ws client)"
+```
+
+---
+
+## Task 8: Scaffold parity files (deploy + docs + deps)
+
+**Files (all create under `aai_cli/init/templates/agent-framework/`):**
+- `requirements.txt`, `env.example`, `gitignore`, `runtime.txt`, `vercel.json`, `Procfile`, `Dockerfile`, `dockerignore`, `README.md`, `AGENTS.md`
+
+- [ ] **Step 1: `requirements.txt`**
+
+```text
+fastapi>=0.136.3
+uvicorn>=0.30.0
+websockets>=14.1
+openai>=1.54.0
+python-dotenv>=1.2.2
+# Pin starlette directly: FastAPI's own floor still admits versions with known CVEs,
+# so raise the transitive floor above them.
+starlette>=1.2.1
+```
+
+(`websockets` uses `additional_headers`, supported from 14.x; if the `install` test reports it unsupported, bump the floor. `openai>=1.54.0` provides `AsyncOpenAI` + streamed `chat.completions`.)
+
+- [ ] **Step 2: `env.example`**
+
+```text
+ASSEMBLYAI_API_KEY=your_assemblyai_api_key_here
+# This cascade uses streaming TTS, which is sandbox-only — use a sandbox key and the
+# sandbox hosts (assembly --sandbox init agent-framework fills these in for you):
+# ASSEMBLYAI_STREAMING_HOST=streaming.sandbox000.assemblyai-labs.com
+# ASSEMBLYAI_TTS_HOST=streaming-tts.sandbox000.assemblyai-labs.com
+# ASSEMBLYAI_LLM_GATEWAY_URL=https://llm-gateway.sandbox000.assemblyai-labs.com/v1
+```
+
+- [ ] **Step 3: `gitignore`, `runtime.txt`, `vercel.json`, `dockerignore` (copy voice-agent's shapes)**
+
+Run:
+
+```bash
+SRC=aai_cli/init/templates/voice-agent
+DST=aai_cli/init/templates/agent-framework
+cp "$SRC/gitignore" "$DST/gitignore"
+cp "$SRC/runtime.txt" "$DST/runtime.txt"
+cp "$SRC/vercel.json" "$DST/vercel.json"
+cp "$SRC/dockerignore" "$DST/dockerignore"
+```
+
+- [ ] **Step 4: `Procfile`**
+
+```text
+web: python -m uvicorn api.index:app --host 0.0.0.0 --port ${PORT:-3000}
+```
+
+- [ ] **Step 5: `Dockerfile` (copy voice-agent's — it already satisfies the contract: EXPOSE 8080, `${PORT:-8080}`, non-root USER)**
+
+Run: `cp aai_cli/init/templates/voice-agent/Dockerfile aai_cli/init/templates/agent-framework/Dockerfile`
+
+- [ ] **Step 6: `README.md`**
+
+```markdown
+# Talk to a cascaded voice agent — AssemblyAI agent-framework starter
+
+Click connect and talk. Unlike the `voice-agent` template (which uses AssemblyAI's
+all-in-one Voice Agent API), this app is a **cascade your own backend orchestrates**:
+Streaming STT transcribes you, the LLM Gateway generates a reply, and streaming TTS
+speaks it back — with turn detection and barge-in handled server-side. The browser
+holds one WebSocket to your backend, so your API key never reaches the client.
+
+## Sandbox-only
+
+Streaming TTS has no production host, so the whole cascade runs against the AssemblyAI
+sandbox with a sandbox key. Scaffold it that way:
+
+```sh
+assembly --sandbox init agent-framework
+```
+
+That pins the sandbox hosts in `.env`. Running against production exits with a hint.
+
+## Run locally
+
+```sh
+assembly dev   # opens http://localhost:3000 (allow microphone access; headphones recommended)
+```
+
+`ASSEMBLYAI_API_KEY` is read from `.env` (created for you by `assembly init`).
+
+## Deploy
+
+This app keeps a **long-running WebSocket**, so it needs a persistent process — not
+Vercel's serverless functions. Use the shipped `Procfile`/`Dockerfile` on Render,
+Railway, Fly.io, or Google Cloud Run (`gcloud run deploy --source .`):
+
+```sh
+uvicorn api.index:app --host 0.0.0.0 --port $PORT
+```
+
+Set `ASSEMBLYAI_API_KEY` and the three sandbox host vars (`ASSEMBLYAI_STREAMING_HOST`,
+`ASSEMBLYAI_TTS_HOST`, `ASSEMBLYAI_LLM_GATEWAY_URL`) in the platform's environment.
+
+## Ideas to extend
+
+- Change the `MODEL`, `VOICE`, `SYSTEM_PROMPT`, or `GREETING` in `api/settings.py`.
+- Stream each LLM sentence into TTS as it completes (lower latency) instead of
+  synthesizing the whole reply at once — see `_generate_reply` in `api/cascade.py`.
+- Add tools (function calling) on the LLM leg so the agent can look things up.
+```
+
+- [ ] **Step 7: `AGENTS.md` (must contain `ASSEMBLYAI_API_KEY`, `buildless`, `static/app.js` for the contract)**
+
+```markdown
+# Agent Notes
+
+This is a buildless FastAPI + browser starter for a **cascaded** voice agent
+(Streaming STT -> LLM Gateway -> streaming TTS), orchestrated server-side. Run it with:
+
+```sh
+assembly dev
+```
+
+## Map
+
+- `api/settings.py`: API key, hosts, model, voice, system prompt, greeting, sample rates.
+- `api/cascade.py`: the orchestrator — STT/TTS socket helpers, the LLM stream, turn
+  detection, barge-in, and the `/ws` browser adapter. Built with injected `Deps` so it
+  is tested against fakes.
+- `api/index.py`: FastAPI app — serves the page/assets and the `/ws` WebSocket.
+- `static/app.js`: WebSocket lifecycle, mic capture, UI state, and event handling
+  (`_CONFIG` block at the top is the primary edit point).
+- `static/audio.js`: microphone pipeline, PCM conversion, playback queue, barge-in.
+- `static/styles.css`: visual styling only; the top `:root` block is the theme edit point.
+- `static/index.html`: page structure and static asset links.
+
+## Change Points
+
+- Model, voice, prompt, greeting, sample rates: edit `api/settings.py`.
+- Cascade behavior (turn detection, barge-in, LLM->TTS piping): edit `api/cascade.py`.
+- Transcript log rendering: edit `addTurn` in `static/app.js`.
+- Playback, barge-in, or PCM conversion: edit `static/audio.js`.
+
+## Invariants
+
+- Never expose `ASSEMBLYAI_API_KEY` or any server secret in `static/`.
+- Streaming TTS is sandbox-only; keep this app pointed at the sandbox hosts.
+- `reply.audio` carries base64 PCM on the `data` field.
+- The browser ↔ backend event protocol matches the `voice-agent` template — keep it
+  stable so `static/audio.js` and the UI stay reusable.
+- Keep the app buildless unless the user explicitly asks for a frontend toolchain.
+```
+
+- [ ] **Step 8: Run the full parametrized contract suite for this template**
+
+Run: `uv run pytest tests/test_init_template_contract.py tests/test_init_template_serve.py -q -k agent-framework`
+Expected: PASS for every parametrized case (`agent-framework`): required files, vercel framework pin, Dockerfile shape, dockerignore `.env`, no `public/`, Procfile, runtime pin, static refs, AGENTS edit points, no committed dotenv, requirements cover imports + pinned, root + static assets served.
+
+- [ ] **Step 9: Commit**
+
+```bash
+git add aai_cli/init/templates/agent-framework/requirements.txt aai_cli/init/templates/agent-framework/env.example aai_cli/init/templates/agent-framework/gitignore aai_cli/init/templates/agent-framework/runtime.txt aai_cli/init/templates/agent-framework/vercel.json aai_cli/init/templates/agent-framework/Procfile aai_cli/init/templates/agent-framework/Dockerfile aai_cli/init/templates/agent-framework/dockerignore aai_cli/init/templates/agent-framework/README.md aai_cli/init/templates/agent-framework/AGENTS.md
+git commit -m "feat(agent-framework): deploy, docs, and dependency scaffold"
+```
+
+---
+
+## Task 9: Regenerate snapshots + full gate
+
+**Files:**
+- Modify: `tests/__snapshots__/test_snapshots_help_build.ambr` (regenerated)
+
+- [ ] **Step 1: Regenerate the `--help` snapshots (the init arg help now lists the new template)**
+
+Run: `uv run pytest tests/test_snapshots_help_build.py --snapshot-update -q`
+Then review: `git diff tests/__snapshots__/test_snapshots_help_build.ambr`
+Expected: the only change is `agent-framework` appended to the `init` template enumeration. If other help snapshot files changed, regenerate them too (`uv run pytest -k snapshots_help --snapshot-update`).
+
+- [ ] **Step 2: Run the targeted suites green**
+
+Run: `uv run pytest tests/test_init_template_agent_framework.py tests/test_init_templates.py tests/test_init_command.py tests/test_init_template_contract.py tests/test_init_template_serve.py -q`
+Expected: all PASS.
+
+- [ ] **Step 3: Run the install smoke test for this template (network + uv required)**
+
+Run: `uv run pytest -m install -q -k agent-framework`
+Expected: PASS (requirements install into a clean venv and `api.index` imports). If `websockets`/`openai` floors are wrong, bump them in `requirements.txt` and re-run.
+
+- [ ] **Step 4: Run the full gate**
+
+Run: `./scripts/check.sh`
+Expected: ends with `All checks passed.` Watch specifically for:
+- `prettier` (template JS/CSS) — clean.
+- `ruff`/`ruff format` over `api/*.py` — clean.
+- `diff-cover` 100% patch coverage — every new `cascade.py`/`index.py`/`settings.py` line is covered by Task 3–6 tests. If a line is reported uncovered, add a direct assertion (do not add `pragma: no cover` for reachable orchestration lines).
+- mutation gate — a surviving mutant means a changed line lacks a *failing-on-break* assertion; strengthen the relevant test.
+- the init template contract gate + unused snapshot/fixture gate.
+
+- [ ] **Step 5: Commit the regenerated snapshot (only if not already committed) and finalize**
+
+```bash
+git add tests/__snapshots__/test_snapshots_help_build.ambr
+git commit -m "test(init): refresh --help snapshot for agent-framework template"
+```
+
+---
+
+## Self-review notes (resolved)
+
+- **Spec coverage:** every spec section maps to a task — architecture/orchestrator (T4–T6), components (T2–T8), CLI wiring (T1), deploy/sandbox caveats (T8 README/AGENTS/settings guard), error handling (T5 `session.error` paths), testing (T3–T9).
+- **Import-time safety:** `settings.py` never raises (T3 test `test_settings_imports_without_key_or_tts_host`); the availability guard is in `run_session` (T5).
+- **Coverage/mutation burden:** orchestrator is decomposed into directly-testable units (`unavailable_reason`, `stt_url`, `tts_url`, `is_final_user_turn`, `build_messages`, `_pump_mic`, `_synthesize`, `_speak`, `_generate_reply`, `maybe_barge_in`, `_pump_stt`, `run_session`, `FastAPIBrowser`), each with an asserting test.
+- **Naming consistency:** `Deps`, `Session`, `run_session`, `_synthesize`, `_generate_reply`, `_speak`, `_pump_mic`, `_pump_stt`, `maybe_barge_in`, `FastAPIBrowser` used identically across tasks and tests.
+- **Gated assertions:** the exact help-string test (T1) and the `--help` snapshot (T9) are both updated for the new registry entry.
+```
diff --git a/docs/superpowers/specs/2026-06-15-agent-framework-template-design.md b/docs/superpowers/specs/2026-06-15-agent-framework-template-design.md
new file mode 100644
index 00000000..c12ba079
--- /dev/null
+++ b/docs/superpowers/specs/2026-06-15-agent-framework-template-design.md
@@ -0,0 +1,134 @@
+# `agent-framework` init template — design
+
+**Date:** 2026-06-15
+**Status:** Approved (design); pending implementation plan
+
+## Goal
+
+Add a fourth `assembly init` starter template, `agent-framework`, that delivers the
+same browser UI/UX as the existing `voice-agent` template but is built on a
+**cascaded** architecture instead of AssemblyAI's all-in-one Voice Agent endpoint.
+The cascade wires three primitives together server-side:
+
+1. **Streaming STT** (v3 realtime WebSocket) — speech in, turn detection.
+2. **LLM Gateway** (OpenAI-compatible HTTP) — reply generation.
+3. **Streaming TTS** (sandbox WebSocket) — speech out.
+
+This is the "framework" you would build yourself if the managed Voice Agent did not
+exist, so it is a useful, instructive starter for users who want control over each leg.
+
+## Architecture
+
+```
+Browser ──mic PCM (16k)──►  FastAPI /ws  ──audio bytes──►  STT WS (v3)
+        ◄──transcripts────         │       ◄──Turn/end_of_turn──┘
+        ◄──reply.audio (24k)──     ├──finalized turn──►  LLM Gateway (OpenAI-compatible, streamed)
+                                   └──reply text──►  TTS WS (sandbox) ──Audio──► back to browser
+```
+
+The browser opens **one** same-origin WebSocket to our FastAPI backend. The backend
+runs the full cascade and keeps all three API credentials server-side. No token mint
+is needed (unlike `voice-agent`/`live-captions`, which mint short-lived tokens because
+the browser connects directly to AssemblyAI).
+
+### Browser ↔ backend protocol (identical to `voice-agent`)
+
+Reusing the existing event vocabulary keeps `static/audio.js` unchanged and reduces
+`static/app.js` to a connection-setup change.
+
+- Browser → server:
+  - `{type: "input.audio", audio: <base64 PCM>}` — one mic frame.
+  - `{type: "session.update", session: {...}}` — optional; the backend may apply
+    `system_prompt`/`greeting`/`voice` overrides or ignore it. Kept for parity.
+- Server → browser:
+  - `{type: "transcript.user", text}` — STT transcript (partial and final).
+  - `{type: "transcript.agent", text}` — the LLM reply text.
+  - `{type: "reply.audio", data: <base64 PCM>}` — a TTS audio chunk.
+  - `{type: "input.speech.started"}` — barge-in: user started talking; browser stops
+    queued audio.
+  - `{type: "reply.done", status}` — reply finished (or `interrupted`).
+  - `{type: "session.error", message}` — any leg failed; surfaced in the UI.
+
+## Components (template files)
+
+- `api/index.py` — FastAPI app. Serves `index.html` + `/static`, and exposes
+  `@app.websocket("/ws")` which hands each accepted connection to the orchestrator.
+- `api/settings.py` — config from env: `ASSEMBLYAI_API_KEY`, `ASSEMBLYAI_STREAMING_HOST`,
+  `ASSEMBLYAI_TTS_HOST`, `ASSEMBLYAI_LLM_GATEWAY_URL`, model (`claude-haiku-4-5-20251001`),
+  voice (`ivy`), system prompt, greeting, sample rates (16 kHz in, 24 kHz out). Fails
+  fast with an actionable message when `ASSEMBLYAI_TTS_HOST` is empty (production has no
+  streaming-TTS host).
+- `api/cascade.py` — per-session async orchestrator:
+  - Opens the STT WS (API key auth) and forwards mic bytes from the browser.
+  - Reads STT `Turn` events: emits `transcript.user` for partials; on `end_of_turn`
+    (formatted final) triggers the reply pipeline.
+  - Reply pipeline: streams the LLM completion (emitting `transcript.agent`), pipes the
+    reply text into a TTS WS (Begin → Generate → ForceFlushTextBuffer → Audio frames →
+    Terminate, mirroring `aai_cli/tts/session.py`), and forwards each Audio frame as
+    `reply.audio`.
+  - Barge-in: a new non-empty user partial while a reply is in flight emits
+    `input.speech.started` and cancels the in-flight LLM/TTS task.
+  - Speaks the configured greeting on connect (greeting text → TTS → `reply.audio`).
+  - Tears down cleanly on browser disconnect / socket close / LLM error, cancelling
+    sibling tasks.
+- `static/index.html` — copy of `voice-agent`'s page with the eyebrow/title/subtitle
+  reworded to describe the cascade; IDs/classes unchanged.
+- `static/styles.css` — identical to `voice-agent`.
+- `static/audio.js` — identical to `voice-agent` (mic pipeline, PCM player, downsample,
+  base64 helpers).
+- `static/app.js` — same event handling as `voice-agent`; `connect()` opens a same-origin
+  `/ws` directly (no `/api/token` fetch).
+- Scaffold parity files: `README.md`, `AGENTS.md`, `env.example`, `gitignore`,
+  `requirements.txt` (adds `websockets` + `openai` to the FastAPI/uvicorn base),
+  `Procfile`, `Dockerfile`, `dockerignore`, `runtime.txt`, `vercel.json`.
+
+## Stack
+
+Async throughout: the `websockets` async client (STT + TTS), `openai.AsyncOpenAI`
+pointed at the gateway base (streamed completion), and FastAPI/Starlette WebSockets for
+the browser side. Served as a long-lived process by `uvicorn`.
+
+## CLI wiring (shared edits — unavoidable for a new template)
+
+- `aai_cli/init/templates.py` — add `"agent-framework": "Agent Framework"` to `TEMPLATES`
+  and to `TEMPLATE_ORDER`.
+- `aai_cli/app/init_exec.py` — add `"ASSEMBLYAI_TTS_HOST": env.streaming_tts_host` to
+  `_active_env_vars()`. This appends one extra (unused, empty-in-prod) var to every
+  template's `.env`; harmless to the others and required by `agent-framework`.
+
+These are the standard registration touch-points for a template; the "a new command
+edits no shared file" rule applies to commands, not templates.
+
+## Deploy / operational caveats
+
+- **Sandbox-only.** Streaming TTS has no production host (`streaming_tts_host` is empty
+  in `production`). A credential is valid only against the environment that minted it,
+  so the *entire* cascade must point at `sandbox000` with a sandbox key. The README
+  leads with `assembly --sandbox init agent-framework`, which pins all three hosts to
+  sandbox via `_active_env_vars()`. Running against production exits fast with a
+  `--sandbox` hint.
+- **Not Vercel-serverless.** The persistent browser WebSocket needs a long-lived
+  process, so the primary deploy path is the shipped `Procfile`/`Dockerfile` (Render,
+  Railway, Fly, Cloud Run). `vercel.json` is retained for static parity, but the README
+  is explicit that the WebSocket requires a long-running host.
+
+## Error handling
+
+Every leg maps a failure to a single `session.error` event to the browser (mirroring
+`voice-agent`). The orchestrator cancels sibling tasks on browser disconnect, STT/TTS
+socket close, or LLM error, so a session never leaks tasks or sockets.
+
+## Testing
+
+The parametrized init-template contract tests (`tests/test_init_template_*.py`) cover
+the new template automatically once it is in `TEMPLATE_ORDER`: required files present,
+renamed dotfiles (`gitignore` → `.gitignore`, `env.example`), wheel packaging, and
+ruff/prettier cleanliness. The plan will confirm exactly what those contracts assert and
+add template-specific coverage where needed (notably the new `ASSEMBLYAI_TTS_HOST` env
+var and the prod fail-fast path).
+
+## Out of scope (YAGNI)
+
+- Function calling / tools on the LLM leg (left as an "ideas to extend" note).
+- Sentence-level TTS streaming tuning beyond what is needed for acceptable latency.
+- A production TTS path (does not exist yet).
diff --git a/pyproject.toml b/pyproject.toml
index 73047a80..d614d42d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -161,8 +161,9 @@ markers = [
 [tool.mypy]
 python_version = "3.12"
 files = ["aai_cli", "tests"]
-# Init templates are packaged scaffold assets, not importable package modules.
-exclude = ["^aai_cli/init/templates/"]
+# Init templates ARE type-checked: they're importable packages
+# (aai_cli.init.templates.<name>.api.*) whose api/ code must stay strict-clean against
+# the real SDK types, not just shipped as scaffold text.
 # Third-party deps (assemblyai, sounddevice) ship no type stubs.
 ignore_missing_imports = true
 disallow_untyped_defs = true
@@ -213,7 +214,12 @@ disable_error_code = ["annotation-unchecked"]
 # pyrightconfig.tests.json from scripts/check.sh so pytest fixtures/mocks don't
 # create thousands of low-value strict diagnostics.
 include = ["aai_cli"]
-exclude = ["aai_cli/init/templates/**"]
+# Re-list pyright's built-in excludes explicitly (defining any `exclude` drops the
+# defaults, which pyright warns about). NOTE: init templates are deliberately NOT here —
+# their api/ code is strict-clean and type-checked in-tree
+# (aai_cli.init.templates.<name>.api.*), the same bar as the rest of the package; only
+# generated/hidden dirs are skipped.
+exclude = ["**/node_modules", "**/__pycache__", "**/.*"]
 pythonVersion = "3.12"
 typeCheckingMode = "strict"
 # Third-party deps (assemblyai, sounddevice) ship no type stubs.
@@ -224,6 +230,28 @@ reportMissingTypeStubs = false
 venvPath = "."
 venv = ".venv"
 
+# Editors (Pylance) read this config and also analyze whatever test file you have
+# open. The `include` above scopes the *gate's* `pyright` run to aai_cli (strict), but
+# the editor checks open tests too — and applies the strict mode above, surfacing
+# thousands of low-value pytest fixture/mock diagnostics (untyped `monkeypatch`,
+# unknown member/parameter types, …). `executionEnvironments` can't set
+# `typeCheckingMode`, so we silence exactly the strict-only "unknown type" family for
+# tests/ — matching the standard mode the gate uses for tests (pyrightconfig.tests.json
+# in scripts/check.sh). Editor-facing only: the gate's `pyright` run never analyzes
+# tests/ (not in `include`), so this changes nothing about what the gate checks.
+[[tool.pyright.executionEnvironments]]
+root = "tests"
+reportUnknownParameterType = "none"
+reportMissingParameterType = "none"
+reportUnknownMemberType = "none"
+reportUnknownVariableType = "none"
+reportUnknownArgumentType = "none"
+reportUnknownLambdaType = "none"
+reportPrivateUsage = "none"
+reportUnusedFunction = "none"
+reportMissingTypeArgument = "none"
+reportUntypedNamedTuple = "none"
+
 [tool.ruff]
 line-length = 100
 target-version = "py312"
@@ -336,7 +364,14 @@ max-statements = 40
 # Template constants include URL path names such as TOKEN_PATH, not credentials.
 # TID251: the scaffolds are end-user example apps that read their own config straight
 # from os.environ — that's correct, idiomatic code to ship, not a CLI-internal env read.
-"aai_cli/init/templates/**" = ["S105", "TID251"]
+# BLE001: starter apps funnel any leg failure into one user-facing error event/response
+# (a broad `except Exception` is the right shape to ship), so the blind-except lint
+# doesn't apply to scaffolds.
+# TID252: scaffolds ship as a self-contained top-level `api/` package, so their inner
+# imports must be relative (`from . import settings`) — that's the one form that resolves
+# both in the shipped app (`uvicorn api.index:app`) and when type-checked in-tree as
+# aai_cli.init.templates.<name>.api. Absolute `from api import …` can't satisfy both.
+"aai_cli/init/templates/**" = ["S105", "TID251", "BLE001", "TID252"]
 # ENV_CLIENT_TOKEN holds an env-var *name*; the shipped token constant is empty in
 # source (release builds inject the write-only client token).
 "aai_cli/core/telemetry.py" = ["S105"]
diff --git a/scripts/check.sh b/scripts/check.sh
index cbd8e511..1dffd1f4 100755
--- a/scripts/check.sh
+++ b/scripts/check.sh
@@ -21,6 +21,13 @@ cleanup_generated_code_dir() {
 echo "==> uv lock freshness"
 uv lock --check
 
+echo "==> validate-pyproject (pyproject.toml schema)"
+# Validate pyproject's standardized tables ([build-system]/[project]) against the PyPA
+# JSON schemas. Run via uvx (like twine/codespell below) so it needs no dev-dep/uv.lock
+# entry; --with packaging enables full requirement/license-expression checks. Unknown
+# [tool.*] tables (ruff/mypy/pyright/…) are intentionally left to those tools.
+uvx --with "packaging>=24.2" validate-pyproject pyproject.toml
+
 echo "==> ruff check (src + tests)"
 uv run ruff check .
 
diff --git a/scripts/docs_consistency_gate.py b/scripts/docs_consistency_gate.py
index 6d495e37..623c0a61 100644
--- a/scripts/docs_consistency_gate.py
+++ b/scripts/docs_consistency_gate.py
@@ -34,6 +34,7 @@
     "ASSEMBLYAI_LLM_GATEWAY_URL",
     "ASSEMBLYAI_STREAMING_HOST",
     "ASSEMBLYAI_AGENTS_HOST",
+    "ASSEMBLYAI_TTS_HOST",
 }
 
 _VAR_RE = re.compile(r"\b((?:AAI|ASSEMBLYAI)_[A-Z0-9_]+)\b")
diff --git a/scripts/template_contract_gate.py b/scripts/template_contract_gate.py
index d821802a..fe084dc9 100644
--- a/scripts/template_contract_gate.py
+++ b/scripts/template_contract_gate.py
@@ -44,7 +44,15 @@ def _fail(message: str) -> NoReturn:
 
 
 def _template_dirs() -> dict[str, Path]:
-    dirs = {path.name: path for path in _ROOT.iterdir() if path.is_dir()}
+    # On-disk dirs are underscore package names; registry ids are kebab. Map each
+    # shipped dir back to its kebab id so both sets compare in the id namespace.
+    # Templates are now importable packages, so importing them creates __pycache__
+    # alongside the template dirs — skip dunder dirs (matches the registry tests).
+    dirs = {
+        path.name.replace("_", "-"): path
+        for path in _ROOT.iterdir()
+        if path.is_dir() and not path.name.startswith("__")
+    }
     registered = set(templates.TEMPLATES)
     shipped = set(dirs)
     missing = registered - shipped
diff --git a/tests/__snapshots__/test_snapshots_help_build.ambr b/tests/__snapshots__/test_snapshots_help_build.ambr
index 3437689b..14447ca7 100644
--- a/tests/__snapshots__/test_snapshots_help_build.ambr
+++ b/tests/__snapshots__/test_snapshots_help_build.ambr
@@ -89,8 +89,8 @@
   
   ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
   │   template       [TEMPLATE]   Template to scaffold: audio-transcription,     │
-  │                               live-captions, voice-agent (omit to pick       │
-  │                               interactively)                                 │
+  │                               live-captions, voice-agent, agent-framework    │
+  │                               (omit to pick interactively)                   │
   │   directory      [DIRECTORY]  Target directory (default: <template>)         │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
diff --git a/tests/_agent_framework.py b/tests/_agent_framework.py
new file mode 100644
index 00000000..a110f42b
--- /dev/null
+++ b/tests/_agent_framework.py
@@ -0,0 +1,173 @@
+"""Shared loaders and fakes for the agent-framework template tests."""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+TEMPLATE_DIR = Path("aai_cli/init/templates/agent_framework")
+
+
+def _load(module: str, monkeypatch: pytest.MonkeyPatch, **env: str):
+    for key, value in env.items():
+        monkeypatch.setenv(key, value)
+    for name in ("api.index", "api.cascade", "api.settings", "api"):
+        sys.modules.pop(name, None)
+    monkeypatch.syspath_prepend(str(TEMPLATE_DIR))
+    return importlib.import_module(module)
+
+
+def _cascade(monkeypatch: pytest.MonkeyPatch):
+    return _load("api.cascade", monkeypatch, ASSEMBLYAI_API_KEY="sk-test")
+
+
+def reimport(name: str):
+    """Re-fetch an already-loaded template module as an untyped handle.
+
+    Tests mutate `settings.API_KEY = …` etc.; a `ModuleType` return would reject
+    those attribute writes, so the module is laundered through its own dynamically
+    typed `__dict__` to recover the open attribute handle the fakes need.
+    """
+    module = importlib.import_module(name)
+    return module.__dict__.get("__aai_self__", module)
+
+
+def untyped_bag():
+    """A fresh empty dict used as a dynamic capture bag in the adapter tests.
+
+    `json.loads` has a dynamic return type, so the bag accepts the mixed scalar /
+    nested-dict values the fakes record without an explicit annotation.
+    """
+    return json.loads("{}")
+
+
+class FakeBrowser:
+    """A browser side: hands out queued inbound messages, then blocks forever so the
+    mic pump stays alive until the test cancels it (mirrors a still-connected client)."""
+
+    def __init__(self, inbound: list[dict[str, object] | None] | None = None):
+        self._inbound: list[dict[str, object] | None] = list(inbound or [])
+        self.sent: list[dict[str, object]] = []
+        self._idle = asyncio.Event()  # never set -> recv() blocks after the queue drains
+
+    async def send(self, event: dict[str, object]) -> None:
+        self.sent.append(event)
+
+    async def recv(self) -> dict[str, object] | None:
+        if self._inbound:
+            return self._inbound.pop(0)
+        await self._idle.wait()
+        return None
+
+    def types(self) -> list[str]:
+        return [str(event["type"]) for event in self.sent]
+
+
+class FakeWS:
+    """A fake STT/TTS socket: yields the given frames as JSON strings, records sends."""
+
+    def __init__(self, frames: list[dict[str, object]] | None = None):
+        self._frames: list[str] = [json.dumps(f) for f in (frames or [])]
+        self.sent: list[str | bytes] = []
+        self.closed = False
+
+    def __aiter__(self) -> FakeWS:
+        return self
+
+    async def __anext__(self) -> str:
+        if not self._frames:
+            raise StopAsyncIteration
+        return self._frames.pop(0)
+
+    async def recv(self) -> str:
+        if not self._frames:
+            raise AssertionError("recv() past end of fake frames")
+        return self._frames.pop(0)
+
+    async def send(self, data: str | bytes) -> None:
+        self.sent.append(data)
+
+    async def close(self) -> None:
+        self.closed = True
+
+
+def _async_return(value):
+    async def factory():
+        return value
+
+    return factory
+
+
+def _deps(monkeypatch, *, stt, tts_frames, llm_text, captured_messages=None):
+    """Build a cascade + Deps wired to fakes.
+
+    ``connect_tts`` hands out a FRESH FakeWS (cloned from ``tts_frames``) on every
+    call, because a streamed reply opens one TTS socket per sentence. When
+    ``captured_messages`` is a list, the fake ``llm_stream`` records the ``messages``
+    it was called with into it so memory threading can be asserted.
+    """
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.GREETING = "hello!"
+    settings.SYSTEM_PROMPT = "be brief"
+
+    async def llm_stream(messages):
+        if captured_messages is not None:
+            captured_messages.append(messages)
+        for piece in llm_text:
+            yield piece
+
+    async def connect_tts():
+        return FakeWS(tts_frames)
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(stt),
+        connect_tts=connect_tts,
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    return cascade, deps
+
+
+class _LLMChunk:
+    """Mimics one OpenAI streaming chunk: `chunk.choices[0].delta.content`."""
+
+    def __init__(self, content: str | None):
+        self.choices = [type("Choice", (), {"delta": type("Delta", (), {"content": content})()})()]
+
+
+class _FakeLLMStream:
+    """An async-iterable over `_LLMChunk`s, the shape `client.chat.completions.create` returns."""
+
+    def __init__(self, contents: list[str | None]):
+        self._contents = contents
+
+    def __aiter__(self):
+        return self._gen()
+
+    async def _gen(self):
+        for content in self._contents:
+            yield _LLMChunk(content)
+
+
+def _fake_openai_client(captured, contents: list[str | None]):
+    """A fake `AsyncOpenAI` class recording its kwargs and the create() kwargs into `captured`."""
+
+    class _FakeCompletions:
+        async def create(self, **kwargs):
+            captured.update(kwargs)
+            return _FakeLLMStream(contents)
+
+    class _FakeClient:
+        def __init__(self, **kwargs):
+            captured["client_kwargs"] = kwargs
+            self.chat = type("Chat", (), {"completions": _FakeCompletions()})()
+
+    return _FakeClient
diff --git a/tests/test_init_command.py b/tests/test_init_command.py
index 493008d1..1f5c4524 100644
--- a/tests/test_init_command.py
+++ b/tests/test_init_command.py
@@ -211,8 +211,8 @@ def test_init_template_arg_help_is_derived_from_registry():
     default = inspect.signature(init_cmd.init).parameters["template"].default
     assert isinstance(default, ArgumentInfo)
     assert default.help == (
-        "Template to scaffold: audio-transcription, live-captions, voice-agent "
-        "(omit to pick interactively)"
+        "Template to scaffold: audio-transcription, live-captions, voice-agent, "
+        "agent-framework (omit to pick interactively)"
     )
 
 
@@ -253,17 +253,30 @@ def test_init_unregistered_template_errors_cleanly(tmp_path, monkeypatch):
 
 
 def _fake_questionary(choice):
-    """A minimal stand-in for the questionary module's select(...).ask() chain."""
+    """A minimal stand-in for the questionary module's select(...).ask() chain.
+
+    The returned namespace records the choices select(...) was called with on its
+    ``choices`` attribute (read it back in a test to inspect titles/descriptions)."""
+
+    ns = types.SimpleNamespace(Choice=None, select=None, choices=None)
 
     class _Choice:
-        def __init__(self, title, value):
+        def __init__(self, title, value, description=None):
+            self.title = title
             self.value = value
+            self.description = description
 
     class _Select:
         def ask(self):
             return choice
 
-    return types.SimpleNamespace(Choice=_Choice, select=lambda *a, **k: _Select())
+    def _select(*_a, choices=None, **_k):
+        ns.choices = choices
+        return _Select()
+
+    ns.Choice = _Choice
+    ns.select = _select
+    return ns
 
 
 def test_pick_template_interactive_returns_choice(monkeypatch):
@@ -273,6 +286,21 @@ def test_pick_template_interactive_returns_choice(monkeypatch):
     assert init_exec._pick_template() == TEMPLATE
 
 
+def test_pick_template_choices_carry_descriptions(monkeypatch):
+    # Each picker choice wires the registry's title + description for that template.
+    from aai_cli.init import templates
+
+    monkeypatch.setattr("sys.stdin", _Tty())
+    monkeypatch.setattr("sys.stdout", _Tty())
+    fake = _fake_questionary(TEMPLATE)
+    monkeypatch.setitem(sys.modules, "questionary", fake)
+    init_exec._pick_template()
+    choices = fake.choices
+    assert [c.value for c in choices] == list(templates.TEMPLATE_ORDER)
+    assert all(c.description == templates.description_for(c.value) for c in choices)
+    assert all(c.description for c in choices)  # every template advertises a description
+
+
 def test_pick_template_ctrl_c_exits_130(monkeypatch):
     # questionary returns None when the user presses Ctrl-C at the prompt.
     monkeypatch.setattr("sys.stdin", _Tty())
@@ -316,11 +344,23 @@ def test_active_env_vars_agents_host_replaces_only_first_streaming(monkeypatch):
         api_base="https://api.x",
         llm_gateway_base="https://llm.x",
         streaming_host="streaming.streaming.example.com",
+        streaming_tts_host="",
     )
     monkeypatch.setattr(init_exec.environments, "active", lambda: fake_env)
     assert init_exec._active_env_vars()["ASSEMBLYAI_AGENTS_HOST"] == "agents.streaming.example.com"
 
 
+def test_active_env_vars_includes_streaming_tts_host(monkeypatch):
+    fake_env = types.SimpleNamespace(
+        api_base="https://api.x",
+        llm_gateway_base="https://llm.x/v1",
+        streaming_host="streaming.x",
+        streaming_tts_host="streaming-tts.x",
+    )
+    monkeypatch.setattr(init_exec.environments, "active", lambda: fake_env)
+    assert init_exec._active_env_vars()["ASSEMBLYAI_TTS_HOST"] == "streaming-tts.x"
+
+
 def test_init_install_failure_reports_and_exits(tmp_path, monkeypatch):
     # A failing dependency install is reported and exits non-zero (no launch).
     monkeypatch.chdir(tmp_path)
diff --git a/tests/test_init_scaffold.py b/tests/test_init_scaffold.py
index 0e9bc38c..f1ac7444 100644
--- a/tests/test_init_scaffold.py
+++ b/tests/test_init_scaffold.py
@@ -37,6 +37,17 @@ def test_scaffold_tightens_existing_env_on_overwrite(tmp_path):
         assert stat.S_IMODE(stale.stat().st_mode) == 0o600
 
 
+def test_scaffold_omits_template_root_init_but_keeps_api_init(tmp_path):
+    # The in-repo template dir is an importable package (templates/<name>/__init__.py),
+    # but that root marker is repo-only and must NOT ship into the scaffolded project —
+    # while api/'s own __init__.py must, since the shipped app's `from . import settings`
+    # needs `api` to be a package.
+    target = tmp_path / "app"
+    scaffold.scaffold("agent-framework", target, api_key="sk-real-key")
+    assert not (target / "__init__.py").exists()  # root marker skipped
+    assert (target / "api" / "__init__.py").is_file()  # api package kept
+
+
 def test_scaffold_copies_files_and_renames_dotfiles(tmp_path):
     target = tmp_path / "app"
     scaffold.scaffold("audio-transcription", target, api_key="sk-real-key")
diff --git a/tests/test_init_template_agent.py b/tests/test_init_template_agent.py
index be945a31..1a33ca46 100644
--- a/tests/test_init_template_agent.py
+++ b/tests/test_init_template_agent.py
@@ -5,7 +5,7 @@
 
 from fastapi.testclient import TestClient
 
-TEMPLATE_DIR = Path("aai_cli/init/templates/voice-agent")
+TEMPLATE_DIR = Path("aai_cli/init/templates/voice_agent")
 
 
 def _load_app(monkeypatch):
diff --git a/tests/test_init_template_agent_framework.py b/tests/test_init_template_agent_framework.py
new file mode 100644
index 00000000..f2f654c8
--- /dev/null
+++ b/tests/test_init_template_agent_framework.py
@@ -0,0 +1,426 @@
+"""Hermetic tests for the agent-framework (cascaded voice agent) template.
+
+The template ships a standalone FastAPI app under api/; load it by path with its
+own `api` package, evicting any other template's cached `api` modules so imports
+stay collision-free under pytest-xdist / pytest-randomly.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+
+import pytest
+
+from tests._agent_framework import (
+    FakeBrowser,
+    FakeWS,
+    _async_return,
+    _cascade,
+    _deps,
+    _load,
+    reimport,
+)
+
+
+def test_settings_imports_without_key_or_tts_host(monkeypatch):
+    # Pre-set vars to empty strings so load_dotenv() (override=False by default) won't
+    # overwrite them from any ambient .env found up the directory tree — the module must
+    # still import cleanly (the empty-host guard lives in the WS handler, not at import).
+    monkeypatch.setenv("ASSEMBLYAI_API_KEY", "")
+    monkeypatch.setenv("ASSEMBLYAI_TTS_HOST", "")
+    settings = _load("api.settings", monkeypatch)
+    assert settings.API_KEY == ""
+    assert settings.MODEL == "claude-haiku-4-5-20251001"
+    assert settings.VOICE == "jane"
+    assert settings.INPUT_SAMPLE_RATE == 16000
+    assert settings.OUTPUT_SAMPLE_RATE == 24000
+
+
+def test_settings_reads_env(monkeypatch):
+    settings = _load(
+        "api.settings",
+        monkeypatch,
+        ASSEMBLYAI_API_KEY="sk-test",
+        ASSEMBLYAI_STREAMING_HOST="streaming.example",
+        ASSEMBLYAI_TTS_HOST="tts.example",
+        ASSEMBLYAI_LLM_GATEWAY_URL="https://llm.example/v1",
+    )
+    assert settings.API_KEY == "sk-test"
+    assert settings.STREAMING_HOST == "streaming.example"
+    assert settings.TTS_HOST == "tts.example"
+    assert settings.LLM_GATEWAY_URL == "https://llm.example/v1"
+
+
+def test_settings_sandbox_defaults(monkeypatch):
+    # With the host env vars unset, the module falls back to the sandbox defaults
+    # (TTS is sandbox-only, so the whole cascade defaults there). Asserting the exact
+    # default strings keeps the mutation gate honest on settings.py's literals.
+    monkeypatch.delenv("ASSEMBLYAI_STREAMING_HOST", raising=False)
+    monkeypatch.delenv("ASSEMBLYAI_TTS_HOST", raising=False)
+    monkeypatch.delenv("ASSEMBLYAI_LLM_GATEWAY_URL", raising=False)
+    settings = _load("api.settings", monkeypatch)
+    assert settings.STREAMING_HOST == "streaming.sandbox000.assemblyai-labs.com"
+    assert settings.TTS_HOST == "streaming-tts.sandbox000.assemblyai-labs.com"
+    assert settings.LLM_GATEWAY_URL == "https://llm-gateway.sandbox000.assemblyai-labs.com/v1"
+    assert settings.SYSTEM_PROMPT == (
+        "You are a friendly, concise voice assistant. Keep replies short and conversational. "
+        "Your reply is read aloud by a text-to-speech engine, so write plain spoken prose — "
+        "no markdown, emoji, bullet lists, or code."
+    )
+    assert settings.GREETING == "Hi! I'm your AssemblyAI voice agent. What can I help you with?"
+    assert settings.MAX_HISTORY == 40
+
+
+def test_unavailable_reason_missing_key(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = ""
+    settings.TTS_HOST = "tts.example"
+    assert "ASSEMBLYAI_API_KEY" in cascade.unavailable_reason(settings)
+
+
+def test_unavailable_reason_missing_tts_host(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = ""
+    reason = cascade.unavailable_reason(settings)
+    assert "sandbox" in reason and "assembly --sandbox init agent-framework" in reason
+
+
+def test_unavailable_reason_ok(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    assert cascade.unavailable_reason(settings) is None
+
+
+def test_stt_url_carries_streaming_params(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.STREAMING_HOST = "streaming.example"
+    settings.INPUT_SAMPLE_RATE = 16000
+    url = cascade.stt_url(settings)
+    assert url.startswith("wss://streaming.example/v3/ws?")
+    assert "sample_rate=16000" in url
+    assert "encoding=pcm_s16le" in url
+    assert "speech_model=u3-rt-pro" in url
+    assert "format_turns=true" in url
+
+
+def test_tts_url_carries_voice_and_rate(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.TTS_HOST = "tts.example"
+    settings.VOICE = "jane"
+    settings.OUTPUT_SAMPLE_RATE = 24000
+    url = cascade.tts_url(settings)
+    assert url.startswith("wss://tts.example/v1/ws/?")
+    assert "voice=jane" in url
+    assert "sample_rate=24000" in url
+
+
+def test_is_final_user_turn(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    assert cascade.is_final_user_turn({"end_of_turn": True, "turn_is_formatted": True}) is True
+    assert cascade.is_final_user_turn({"end_of_turn": True, "turn_is_formatted": False}) is False
+    assert cascade.is_final_user_turn({"end_of_turn": False, "turn_is_formatted": True}) is False
+    assert cascade.is_final_user_turn({}) is False
+
+
+def test_build_messages(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    messages = cascade.build_messages("be brief", [{"role": "user", "content": "hello there"}])
+    assert messages == [
+        {"role": "system", "content": "be brief"},
+        {"role": "user", "content": "hello there"},
+    ]
+
+
+def test_build_messages_prepends_system_to_history(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    history = [
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ]
+    assert cascade.build_messages("be brief", history) == [
+        {"role": "system", "content": "be brief"},
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ]
+
+
+def test_trim_history_keeps_last_n(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    history = [{"role": "user", "content": str(i)} for i in range(5)]
+    cascade._trim_history(history, 3)
+    assert history == [
+        {"role": "user", "content": "2"},
+        {"role": "user", "content": "3"},
+        {"role": "user", "content": "4"},
+    ]
+    # Under the cap: untouched (the other branch).
+    short = [{"role": "user", "content": "only"}]
+    cascade._trim_history(short, 3)
+    assert short == [{"role": "user", "content": "only"}]
+
+
+def test_split_sentences(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    assert cascade._split_sentences("One. Two! Three? rest") == (
+        ["One.", "Two!", "Three?"],
+        " rest",
+    )
+    assert cascade._split_sentences("whole") == ([], "whole")
+    assert cascade._split_sentences("") == ([], "")
+
+
+def test_pump_mic_forwards_decoded_audio(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    pcm = b"\x01\x02\x03\x04"
+    browser = FakeBrowser([{"type": "input.audio", "audio": base64.b64encode(pcm).decode()}, None])
+    stt = FakeWS()
+    asyncio.run(cascade._pump_mic(browser, stt))
+    assert stt.sent == [pcm]
+
+
+def test_pump_mic_ignores_non_audio_and_stops_on_disconnect(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser([{"type": "noise"}, None])
+    stt = FakeWS()
+    asyncio.run(cascade._pump_mic(browser, stt))
+    assert stt.sent == []
+
+
+def test_pump_mic_skips_malformed_base64(monkeypatch):
+    # A bad audio frame (invalid base64) must be dropped, not crash the session: a
+    # valid frame after it still gets forwarded.
+    cascade = _cascade(monkeypatch)
+    pcm = b"\x01\x02\x03\x04"
+    browser = FakeBrowser(
+        [
+            {"type": "input.audio", "audio": "abc"},  # invalid padding -> ValueError
+            {"type": "input.audio", "audio": base64.b64encode(pcm).decode()},
+            None,
+        ]
+    )
+    stt = FakeWS()
+    asyncio.run(cascade._pump_mic(browser, stt))
+    assert stt.sent == [pcm]  # only the valid frame survived; the bad one was skipped
+
+
+def test_maybe_barge_in_cancels_active_reply(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+
+    async def drive():
+        session = cascade.Session()
+        started = asyncio.Event()
+
+        async def never_ending():
+            started.set()
+            await asyncio.Event().wait()
+
+        session.reply_task = asyncio.create_task(never_ending())
+        await started.wait()
+        await cascade.maybe_barge_in(browser, session)
+        return session
+
+    session = asyncio.run(drive())
+    assert browser.sent == [{"type": "input.speech.started"}]
+    assert session.reply_task is None
+
+
+def test_maybe_barge_in_noop_without_reply(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    asyncio.run(cascade.maybe_barge_in(browser, cascade.Session()))
+    assert browser.sent == []
+
+
+def test_run_session_unavailable_emits_error(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = ""
+    browser = FakeBrowser()
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=_async_return(FakeWS()),
+        llm_stream=lambda _m: iter(()),
+        settings=settings,
+    )
+    asyncio.run(cascade.run_session(browser, deps))
+    assert browser.types() == ["session.error"]
+
+
+def test_run_session_connect_failure_emits_error(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+
+    async def boom():
+        raise RuntimeError("no route to host")
+
+    deps = cascade.Deps(
+        connect_stt=boom,
+        connect_tts=_async_return(FakeWS()),
+        llm_stream=lambda _m: iter(()),
+        settings=settings,
+    )
+    browser = FakeBrowser()
+    asyncio.run(cascade.run_session(browser, deps))
+    assert browser.types() == ["session.error"]
+    assert "no route to host" in str(browser.sent[0]["message"])
+
+
+def test_run_session_happy_path(monkeypatch):
+    # STT yields one finalized user turn, then closes -> the reply drains, then the
+    # session tears down. The greeting speaks first. The mic pump blocks on FakeBrowser's
+    # idle event until run_session cancels it.
+    stt = FakeWS(
+        [
+            {
+                "type": "Turn",
+                "transcript": "what time is it",
+                "end_of_turn": True,
+                "turn_is_formatted": True,
+            }
+        ]
+    )
+    tts_sockets = [
+        FakeWS(
+            [
+                {"type": "Begin", "configuration": {}},
+                {"type": "Audio", "audio": "G=", "is_final": True},
+            ]
+        ),
+        FakeWS(
+            [
+                {"type": "Begin", "configuration": {}},
+                {"type": "Audio", "audio": "R=", "is_final": True},
+            ]
+        ),
+    ]
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.GREETING = "hello!"
+    settings.SYSTEM_PROMPT = "be brief"
+
+    async def llm_stream(_messages):
+        yield "It is noon."
+
+    def connect_tts():
+        async def factory():
+            return tts_sockets.pop(0)
+
+        return factory()
+
+    captured_session = {}
+    real_generate_reply = cascade._generate_reply
+
+    async def spy_generate_reply(browser, deps, session):
+        captured_session["session"] = session
+        await real_generate_reply(browser, deps, session)
+
+    monkeypatch.setattr(cascade, "_generate_reply", spy_generate_reply)
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(stt),
+        connect_tts=connect_tts,
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    browser = FakeBrowser()
+    asyncio.run(asyncio.wait_for(cascade.run_session(browser, deps), timeout=5))
+
+    types = browser.types()
+    assert types[0] == "transcript.agent"  # greeting spoken first
+    assert {"type": "transcript.user", "text": "what time is it"} in browser.sent
+    assert {"type": "transcript.agent", "text": "It is noon."} in browser.sent
+    assert {"type": "reply.audio", "data": "R="} in browser.sent
+    assert browser.sent[-1] == {"type": "reply.done", "status": "completed"}
+    assert stt.closed is True
+    # Memory: the greeting is seeded, then the user turn and assistant reply append.
+    assert captured_session["session"].history == [
+        {"role": "assistant", "content": "hello!"},
+        {"role": "user", "content": "what time is it"},
+        {"role": "assistant", "content": "It is noon."},
+    ]
+
+
+def test_pump_stt_interim_turn_barges_in_without_displaying(monkeypatch):
+    # An interim (non-final) turn is NOT shown to the user; it only barges in on an
+    # active reply. Only the finalized turn gets a transcript.user (see happy-path test).
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    browser = FakeBrowser()
+
+    async def drive():
+        session = cascade.Session()
+
+        async def never_ending():
+            await asyncio.Event().wait()
+
+        session.reply_task = asyncio.create_task(never_ending())
+        stt = FakeWS(
+            [
+                {
+                    "type": "Turn",
+                    "transcript": "wait",
+                    "end_of_turn": False,
+                    "turn_is_formatted": False,
+                }
+            ]
+        )
+        # _deps not used; build minimal deps
+        deps = cascade.Deps(
+            connect_stt=_async_return(stt),
+            connect_tts=_async_return(FakeWS()),
+            llm_stream=lambda _m: iter(()),
+            settings=settings,
+        )
+        await cascade._pump_stt(browser, stt, deps, session)
+
+    asyncio.run(asyncio.wait_for(drive(), timeout=5))
+    # Interim turn: barge-in fires, but the partial text is never displayed.
+    assert {"type": "input.speech.started"} in browser.sent
+    assert {"type": "transcript.user", "text": "wait"} not in browser.sent
+    assert not any(event.get("type") == "transcript.user" for event in browser.sent)
+
+
+def test_pump_stt_final_turn_barges_in_active_reply(monkeypatch):
+    # A finalized turn arriving while a reply is still playing must tell the browser to
+    # stop (input.speech.started), not just cancel server-side — otherwise the old TTS
+    # keeps playing in the browser.
+    stt = FakeWS(
+        [
+            {
+                "type": "Turn",
+                "transcript": "actually never mind",
+                "end_of_turn": True,
+                "turn_is_formatted": True,
+            }
+        ]
+    )
+    cascade, deps = _deps(monkeypatch, stt=stt, tts_frames=[], llm_text=[])
+    browser = FakeBrowser()
+
+    async def drive():
+        session = cascade.Session()
+
+        async def never_ending():
+            await asyncio.Event().wait()
+
+        session.reply_task = asyncio.create_task(never_ending())
+        await cascade._pump_stt(browser, stt, deps, session)
+
+    asyncio.run(asyncio.wait_for(drive(), timeout=5))
+    assert {"type": "input.speech.started"} in browser.sent  # browser told to stop
+    assert {"type": "transcript.user", "text": "actually never mind"} in browser.sent
diff --git a/tests/test_init_template_agent_framework_api.py b/tests/test_init_template_agent_framework_api.py
new file mode 100644
index 00000000..c2ba5442
--- /dev/null
+++ b/tests/test_init_template_agent_framework_api.py
@@ -0,0 +1,284 @@
+"""Hermetic tests for the agent-framework template's live-client adapters and FastAPI routes."""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+from types import SimpleNamespace
+
+import pytest
+
+from tests._agent_framework import (
+    FakeBrowser,
+    FakeWS,
+    _async_return,
+    _cascade,
+    _fake_openai_client,
+    _load,
+    reimport,
+    untyped_bag,
+)
+
+
+def test_connect_stt_uses_auth_header_and_url(monkeypatch: pytest.MonkeyPatch) -> None:
+    import websockets
+
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.STREAMING_HOST = "streaming.example"
+    settings.INPUT_SAMPLE_RATE = 16000
+    captured = untyped_bag()
+
+    async def fake_connect(url, **kwargs):
+        captured["url"] = url
+        captured["kwargs"] = kwargs
+        return FakeWS()
+
+    monkeypatch.setattr(websockets, "connect", fake_connect)
+    result = asyncio.run(cascade._connect_stt(settings))
+    assert isinstance(result, FakeWS)
+    assert captured["url"] == cascade.stt_url(settings)
+    assert captured["kwargs"]["additional_headers"] == {"Authorization": "sk-test"}
+
+
+def test_connect_tts_passes_max_size_none(monkeypatch: pytest.MonkeyPatch) -> None:
+    import websockets
+
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.VOICE = "jane"
+    settings.OUTPUT_SAMPLE_RATE = 24000
+    captured = untyped_bag()
+
+    async def fake_connect(url, **kwargs):
+        captured["url"] = url
+        captured["kwargs"] = kwargs
+        return FakeWS()
+
+    monkeypatch.setattr(websockets, "connect", fake_connect)
+    result = asyncio.run(cascade._connect_tts(settings))
+    assert isinstance(result, FakeWS)
+    assert captured["url"] == cascade.tts_url(settings)
+    assert captured["kwargs"]["additional_headers"] == {"Authorization": "sk-test"}
+    assert captured["kwargs"]["max_size"] is None
+
+
+def test_llm_stream_yields_nonempty_deltas(monkeypatch: pytest.MonkeyPatch) -> None:
+    import openai
+
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.LLM_GATEWAY_URL = "https://llm.example/v1"
+    settings.MODEL = "test-model"
+    captured = untyped_bag()
+
+    monkeypatch.setattr(
+        openai, "AsyncOpenAI", _fake_openai_client(captured, ["Hi", "", " there", None])
+    )
+
+    async def collect():
+        return [d async for d in cascade._llm_stream(settings, [{"role": "user", "content": "hi"}])]
+
+    deltas = asyncio.run(collect())
+    assert deltas == ["Hi", " there"]  # empty + None filtered by `if delta`
+    assert captured["model"] == "test-model"
+    assert captured["stream"] is True
+    assert captured["client_kwargs"]["base_url"] == "https://llm.example/v1"
+    assert captured["client_kwargs"]["api_key"] == "sk-test"
+
+
+def test_llm_stream_skips_chunk_without_choices(monkeypatch: pytest.MonkeyPatch) -> None:
+    # The Anthropic-backed gateway ends the stream with a usage/final chunk carrying an
+    # empty `choices` list; _llm_stream must skip it, not IndexError on chunk.choices[0].
+    import openai
+
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.LLM_GATEWAY_URL = "https://llm.example/v1"
+    settings.MODEL = "test-model"
+
+    def _chunk(content: str) -> SimpleNamespace:
+        return SimpleNamespace(choices=[SimpleNamespace(delta=SimpleNamespace(content=content))])
+
+    class _Stream:
+        def __aiter__(self):
+            return self._gen()
+
+        async def _gen(self):
+            yield _chunk("Hi")
+            yield SimpleNamespace(choices=[])  # gateway usage/final chunk — no choices
+            yield _chunk(" there")
+
+    class _Completions:
+        async def create(self, **kwargs):
+            return _Stream()
+
+    class _Client:
+        def __init__(self, **kwargs):
+            self.chat = SimpleNamespace(completions=_Completions())
+
+    monkeypatch.setattr(openai, "AsyncOpenAI", _Client)
+
+    async def collect():
+        return [d async for d in cascade._llm_stream(settings, [{"role": "user", "content": "hi"}])]
+
+    assert asyncio.run(collect()) == ["Hi", " there"]
+
+
+def test_deps_real_factories_invoke_adapters(monkeypatch: pytest.MonkeyPatch) -> None:
+    import openai
+    import websockets
+
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.STREAMING_HOST = "streaming.example"
+    settings.LLM_GATEWAY_URL = "https://llm.example/v1"
+
+    async def fake_connect(url, **kwargs):
+        return FakeWS()
+
+    monkeypatch.setattr(websockets, "connect", fake_connect)
+    monkeypatch.setattr(openai, "AsyncOpenAI", _fake_openai_client({}, []))
+
+    deps = cascade.Deps.real(settings)
+    assert deps.settings is settings
+
+    async def drive():
+        assert isinstance(await deps.connect_stt(), FakeWS)
+        assert isinstance(await deps.connect_tts(), FakeWS)
+        return [d async for d in deps.llm_stream([{"role": "user", "content": "hi"}])]
+
+    assert asyncio.run(drive()) == []
+
+
+def test_generate_reply_propagates_cancellation(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    browser = FakeBrowser()
+
+    async def llm_stream(_messages):
+        yield "partial"  # one delta, then block until the task is cancelled
+        await asyncio.Event().wait()
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=_async_return(FakeWS()),
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+
+    async def drive():
+        task = asyncio.create_task(cascade._generate_reply(browser, deps, cascade.Session()))
+        await asyncio.sleep(0)  # let it start and block on the LLM
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    asyncio.run(drive())
+    # Cancellation must NOT be turned into a session.error.
+    assert browser.sent == []
+
+
+def test_pump_stt_skips_non_turn_and_empty_transcript(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.SYSTEM_PROMPT = "be brief"
+    browser = FakeBrowser()
+    # A non-Turn frame and an empty-transcript Turn must both be skipped (no transcript.user),
+    # then the stream closes.
+    stt = FakeWS(
+        [
+            {"type": "Begin"},
+            {"type": "Turn", "transcript": "", "end_of_turn": False, "turn_is_formatted": False},
+        ]
+    )
+    deps = cascade.Deps(
+        connect_stt=_async_return(stt),
+        connect_tts=_async_return(FakeWS()),
+        llm_stream=lambda _m: iter(()),
+        settings=settings,
+    )
+    session = cascade.Session()
+    asyncio.run(asyncio.wait_for(cascade._pump_stt(browser, stt, deps, session), timeout=5))
+    assert browser.sent == []  # nothing emitted for non-Turn or empty-transcript frames
+
+
+def test_synthesize_audio_frame_missing_payload_defaults_empty(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    # An Audio frame with no `audio` key must yield reply.audio with data == "" (the default).
+    tts = FakeWS([{"type": "Begin", "configuration": {}}, {"type": "Audio", "is_final": True}])
+    asyncio.run(cascade._synthesize(browser, tts, "hi"))
+    assert {"type": "reply.audio", "data": ""} in browser.sent
+
+
+def test_index_serves_page(monkeypatch: pytest.MonkeyPatch) -> None:
+    index = _load("api.index", monkeypatch, ASSEMBLYAI_API_KEY="sk-test")
+    from fastapi.testclient import TestClient
+
+    resp = TestClient(index.app).get("/")
+    assert resp.status_code == 200
+    assert "<html" in resp.text.lower()
+
+
+def test_ws_route_runs_cascade(monkeypatch: pytest.MonkeyPatch) -> None:
+    # Drive the real /ws adapter with TestClient's WebSocket, but stub run_session so
+    # the route's accept + adapter wiring is exercised without real upstreams.
+    index = _load("api.index", monkeypatch, ASSEMBLYAI_API_KEY="sk-test")
+    cascade = reimport("api.cascade")
+    captured = untyped_bag()
+
+    async def fake_run_session(browser, deps):
+        captured["deps"] = deps
+        msg = await browser.recv()
+        await browser.send({"type": "echo", "got": msg})
+
+    monkeypatch.setattr(cascade, "run_session", fake_run_session)
+    from fastapi.testclient import TestClient
+
+    with TestClient(index.app).websocket_connect("/ws") as ws:
+        ws.send_json({"type": "input.audio", "audio": "AAA="})
+        assert ws.receive_json() == {
+            "type": "echo",
+            "got": {"type": "input.audio", "audio": "AAA="},
+        }
+    # The handler must wire the real settings module into Deps.real (not None / wrong arg).
+    assert captured["deps"].settings is importlib.import_module("api.settings")
+
+
+def test_fastapi_browser_recv_returns_none_on_disconnect(monkeypatch: pytest.MonkeyPatch) -> None:
+    cascade = _cascade(monkeypatch)
+    from fastapi import WebSocketDisconnect
+
+    class FakeWSStarlette:
+        def __init__(self):
+            self.sent: list[dict[str, object]] = []
+
+        async def send_json(self, event):
+            self.sent.append(event)
+
+        async def receive_json(self):
+            raise WebSocketDisconnect(code=1000)
+
+    ws = FakeWSStarlette()
+    browser = cascade.FastAPIBrowser(ws)
+
+    async def drive():
+        await browser.send({"type": "x"})
+        return await browser.recv()
+
+    assert asyncio.run(drive()) is None
+    assert ws.sent == [{"type": "x"}]
diff --git a/tests/test_init_template_agent_framework_reply.py b/tests/test_init_template_agent_framework_reply.py
new file mode 100644
index 00000000..b17d92bc
--- /dev/null
+++ b/tests/test_init_template_agent_framework_reply.py
@@ -0,0 +1,252 @@
+"""Reply-synthesis-path tests for the agent-framework template.
+
+Covers _synthesize (the TTS protocol), _speak (the greeting/single-shot reply), and
+_generate_reply (the streamed, sentence-by-sentence reply with conversation memory).
+Split out of test_init_template_agent_framework.py to keep each file under the
+500-line gate.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+
+import pytest
+
+from tests._agent_framework import (
+    FakeBrowser,
+    FakeWS,
+    _async_return,
+    _cascade,
+    _deps,
+    reimport,
+)
+
+
+def test_synthesize_streams_audio_frames(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    tts = FakeWS(
+        [
+            {"type": "Begin", "configuration": {"sample_rate": 24000}},
+            {"type": "Audio", "audio": "AAA="},
+            {"type": "Audio", "audio": "BBB=", "is_final": True},
+        ]
+    )
+    asyncio.run(cascade._synthesize(browser, tts, "hi"))
+    assert browser.sent == [
+        {"type": "reply.audio", "data": "AAA="},
+        {"type": "reply.audio", "data": "BBB="},
+    ]
+    kinds = [json.loads(s)["type"] for s in tts.sent]
+    assert kinds == ["Generate", "Flush", "Terminate"]
+    # The Generate frame carries the text.
+    assert json.loads(tts.sent[0])["text"] == "hi"
+    assert tts.closed is True
+
+
+def test_synthesize_raises_on_error_frame(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    tts = FakeWS([{"type": "Begin", "configuration": {}}, {"type": "Error", "error": "bad voice"}])
+    with pytest.raises(RuntimeError, match="bad voice"):
+        asyncio.run(cascade._synthesize(browser, tts, "hi"))
+
+
+def test_synthesize_raises_when_no_begin(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    tts = FakeWS([{"type": "Audio", "audio": "AAA=", "is_final": True}])
+    with pytest.raises(RuntimeError, match="did not begin"):
+        asyncio.run(cascade._synthesize(browser, tts, "hi"))
+
+
+def test_synthesize_handles_close_without_final_frame(monkeypatch):
+    # The TTS socket closes after some audio but before an is_final frame: the loop
+    # must end cleanly (forward what arrived, then Terminate) instead of raising.
+    cascade = _cascade(monkeypatch)
+    browser = FakeBrowser()
+    tts = FakeWS([{"type": "Begin", "configuration": {}}, {"type": "Audio", "audio": "AAA="}])
+    asyncio.run(cascade._synthesize(browser, tts, "hi"))
+    assert {"type": "reply.audio", "data": "AAA="} in browser.sent
+    assert json.loads(tts.sent[-1])["type"] == "Terminate"  # graceful teardown still ran
+    assert tts.closed is True
+
+
+def test_speak_emits_done_on_success(monkeypatch):
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[{"type": "Begin", "configuration": {}}, {"type": "Audio", "is_final": True}],
+        llm_text=[],
+    )
+    browser = FakeBrowser()
+    asyncio.run(cascade._speak(browser, deps, "hello!"))
+    assert {"type": "transcript.agent", "text": "hello!"} in browser.sent
+    assert browser.sent[-1] == {"type": "reply.done", "status": "completed"}
+
+
+def test_speak_surfaces_error_instead_of_silent_failure(monkeypatch):
+    # A greeting/TTS failure must become a clean session.error — not a swallowed
+    # task exception that leaves the user with no audio and no signal.
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+
+    async def failing_connect_tts():
+        raise RuntimeError("tts unreachable")
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=failing_connect_tts,
+        llm_stream=lambda _m: iter(()),
+        settings=settings,
+    )
+    browser = FakeBrowser()
+    asyncio.run(cascade._speak(browser, deps, "hello!"))
+    assert browser.sent == [
+        {"type": "transcript.agent", "text": "hello!"},
+        {"type": "session.error", "message": "tts unreachable"},
+    ]
+
+
+def test_speak_propagates_cancellation(monkeypatch):
+    # Barge-in on the greeting must cancel cleanly (re-raise), not become a session.error.
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+
+    async def blocking_connect_tts():
+        await asyncio.Event().wait()  # never resolves -> task blocks until cancelled
+        return FakeWS()
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=blocking_connect_tts,
+        llm_stream=lambda _m: iter(()),
+        settings=settings,
+    )
+    browser = FakeBrowser()
+
+    async def drive():
+        task = asyncio.create_task(cascade._speak(browser, deps, "hello!"))
+        await asyncio.sleep(0)
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    asyncio.run(drive())
+    assert not any(e["type"] == "session.error" for e in browser.sent)
+
+
+def test_generate_reply_speaks_llm_text(monkeypatch):
+    # A single sentence: one TTS socket, one transcript.agent + reply.audio, one done.
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[
+            {"type": "Begin", "configuration": {}},
+            {"type": "Audio", "audio": "AAA=", "is_final": True},
+        ],
+        llm_text=["Hello", " world."],
+    )
+    browser = FakeBrowser()
+    session = cascade.Session()
+    asyncio.run(cascade._generate_reply(browser, deps, session))
+    assert {"type": "transcript.agent", "text": "Hello world."} in browser.sent
+    assert {"type": "reply.audio", "data": "AAA="} in browser.sent
+    assert browser.sent[-1] == {"type": "reply.done", "status": "completed"}
+    assert session.history[-1] == {"role": "assistant", "content": "Hello world."}
+
+
+def test_generate_reply_streams_each_sentence(monkeypatch):
+    # Deltas form TWO sentences -> two TTS sockets, two transcript.agent + reply.audio.
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[
+            {"type": "Begin", "configuration": {}},
+            {"type": "Audio", "audio": "AAA=", "is_final": True},
+        ],
+        llm_text=["Hello there. ", "How are you?"],
+    )
+    browser = FakeBrowser()
+    session = cascade.Session()
+    asyncio.run(cascade._generate_reply(browser, deps, session))
+    agent_texts = [e["text"] for e in browser.sent if e["type"] == "transcript.agent"]
+    assert agent_texts == ["Hello there.", "How are you?"]
+    audio = [e for e in browser.sent if e["type"] == "reply.audio"]
+    assert audio == [{"type": "reply.audio", "data": "AAA="}] * 2
+    done = [e for e in browser.sent if e["type"] == "reply.done"]
+    assert done == [{"type": "reply.done", "status": "completed"}]
+    assert session.history[-1] == {"role": "assistant", "content": "Hello there. How are you?"}
+
+
+def test_generate_reply_empty_llm_emits_done(monkeypatch):
+    cascade, deps = _deps(monkeypatch, stt=FakeWS(), tts_frames=[], llm_text=["  "])
+    browser = FakeBrowser()
+    session = cascade.Session()
+    asyncio.run(cascade._generate_reply(browser, deps, session))
+    assert browser.sent == [{"type": "reply.done", "status": "empty"}]
+    assert session.history == []  # nothing recorded for an empty reply
+
+
+def test_generate_reply_surfaces_error(monkeypatch):
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+
+    async def llm_stream(_messages):
+        yield "partial"  # one delta arrives (no boundary), then the LLM leg fails mid-stream
+        raise RuntimeError("llm down")
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=_async_return(FakeWS()),
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    browser = FakeBrowser()
+    asyncio.run(cascade._generate_reply(browser, deps, cascade.Session()))
+    assert browser.sent == [{"type": "session.error", "message": "llm down"}]
+
+
+def test_generate_reply_records_spoken_partial_on_cancel(monkeypatch):
+    # Barge-in after the first sentence is spoken: that sentence must be recorded in
+    # history so the conversation keeps user/assistant alternation, even though the
+    # reply never reached its normal completion.
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.SYSTEM_PROMPT = "be brief"
+
+    async def llm_stream(_messages):
+        yield "First sentence. "  # one complete sentence -> spoken
+        await asyncio.Event().wait()  # then block until the barge-in cancels us
+
+    async def connect_tts():
+        return FakeWS([{"type": "Begin", "configuration": {}}, {"type": "Audio", "is_final": True}])
+
+    deps = cascade.Deps(
+        connect_stt=_async_return(FakeWS()),
+        connect_tts=connect_tts,
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    browser = FakeBrowser()
+    session = cascade.Session()
+
+    async def drive():
+        task = asyncio.create_task(cascade._generate_reply(browser, deps, session))
+        for _ in range(5):
+            await asyncio.sleep(0)  # let it stream + synthesize the sentence, then block
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    asyncio.run(drive())
+    assert session.history == [{"role": "assistant", "content": "First sentence."}]
diff --git a/tests/test_init_template_agent_framework_stt.py b/tests/test_init_template_agent_framework_stt.py
new file mode 100644
index 00000000..40e77358
--- /dev/null
+++ b/tests/test_init_template_agent_framework_stt.py
@@ -0,0 +1,141 @@
+"""Hermetic tests for the agent-framework template's conversation memory + multi-turn STT.
+
+Covers the sliding-window history threaded through `_generate_reply` and `_pump_stt`:
+the user turn is recorded before the reply, the assistant turn after it completes, so a
+multi-turn session accumulates an alternating user/assistant transcript.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Awaitable, Callable
+
+import pytest
+
+from tests._agent_framework import FakeBrowser, FakeWS, _async_return, _cascade, _deps, reimport
+
+
+def test_generate_reply_records_history(monkeypatch):
+    # Seed a prior exchange; the reply must thread it through build_messages -> llm_stream
+    # AND append the new assistant turn.
+    captured: list[list[dict[str, str]]] = []
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[
+            {"type": "Begin", "configuration": {}},
+            {"type": "Audio", "audio": "AAA=", "is_final": True},
+        ],
+        llm_text=["Sure."],
+        captured_messages=captured,
+    )
+    browser = FakeBrowser()
+    session = cascade.Session()
+    session.history = [
+        {"role": "user", "content": "earlier"},
+        {"role": "assistant", "content": "earlier reply"},
+    ]
+    asyncio.run(cascade._generate_reply(browser, deps, session))
+    assert captured[0] == [
+        {"role": "system", "content": "be brief"},
+        {"role": "user", "content": "earlier"},
+        {"role": "assistant", "content": "earlier reply"},
+    ]
+    assert session.history[-1] == {"role": "assistant", "content": "Sure."}
+
+
+def test_generate_reply_speaks_unpunctuated_tail(monkeypatch):
+    # An LLM reply with no terminal punctuation leaves a non-empty tail after the loop;
+    # that remainder must still be spoken (its own TTS socket) and recorded.
+    cascade, deps = _deps(
+        monkeypatch,
+        stt=FakeWS(),
+        tts_frames=[
+            {"type": "Begin", "configuration": {}},
+            {"type": "Audio", "audio": "AAA=", "is_final": True},
+        ],
+        llm_text=["no period here"],
+    )
+    browser = FakeBrowser()
+    session = cascade.Session()
+    asyncio.run(cascade._generate_reply(browser, deps, session))
+    agent_texts = [e["text"] for e in browser.sent if e["type"] == "transcript.agent"]
+    assert agent_texts == ["no period here"]
+    assert {"type": "reply.audio", "data": "AAA="} in browser.sent
+    assert browser.sent[-1] == {"type": "reply.done", "status": "completed"}
+    assert session.history[-1] == {"role": "assistant", "content": "no period here"}
+
+
+def _turn(text: str) -> dict[str, object]:
+    return {
+        "type": "Turn",
+        "transcript": text,
+        "end_of_turn": True,
+        "turn_is_formatted": True,
+    }
+
+
+class _DrainingSTT(FakeWS):
+    """An STT fake that lets the prior reply task finish before emitting the next turn.
+
+    `_pump_stt` schedules each reply as a background task; a second turn arriving while
+    the first reply is mid-flight would cancel (barge in on) it. To exercise the
+    *uninterrupted* multi-turn path — where each reply completes and appends its
+    assistant turn — this runs the injected drain callback (which awaits the session's
+    in-flight reply) before yielding the next turn.
+    """
+
+    def __init__(
+        self, drain: Callable[[], Awaitable[None]], frames: list[dict[str, object]]
+    ) -> None:
+        super().__init__(frames)
+        self._drain = drain
+
+    async def __anext__(self) -> str:
+        await self._drain()
+        return await super().__anext__()
+
+
+def test_pump_stt_accumulates_multi_turn_history(monkeypatch: pytest.MonkeyPatch) -> None:
+    # Two finalized turns, each reply allowed to complete, must accumulate history to
+    # [user1, assistant1, user2, assistant2] in order.
+    cascade = _cascade(monkeypatch)
+    settings = reimport("api.settings")
+    settings.API_KEY = "sk-test"
+    settings.TTS_HOST = "tts.example"
+    settings.SYSTEM_PROMPT = "be brief"
+    browser = FakeBrowser()
+
+    replies = iter(["First reply.", "Second reply."])
+
+    async def llm_stream(_messages):
+        yield next(replies)
+
+    async def connect_tts():
+        return FakeWS(
+            [
+                {"type": "Begin", "configuration": {}},
+                {"type": "Audio", "audio": "AAA=", "is_final": True},
+            ]
+        )
+
+    session = cascade.Session()
+
+    async def drain_reply() -> None:
+        if session.reply_task is not None:
+            await session.reply_task
+
+    stt = _DrainingSTT(drain_reply, [_turn("hello"), _turn("again")])
+    deps = cascade.Deps(
+        connect_stt=_async_return(stt),
+        connect_tts=connect_tts,
+        llm_stream=llm_stream,
+        settings=settings,
+    )
+    asyncio.run(asyncio.wait_for(cascade._pump_stt(browser, stt, deps, session), timeout=5))
+    assert session.history == [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "First reply."},
+        {"role": "user", "content": "again"},
+        {"role": "assistant", "content": "Second reply."},
+    ]
diff --git a/tests/test_init_template_contract.py b/tests/test_init_template_contract.py
index 3d4f824a..9d0bf965 100644
--- a/tests/test_init_template_contract.py
+++ b/tests/test_init_template_contract.py
@@ -11,7 +11,21 @@
 )
 # Map an import name to its PyPI distribution where they differ.
 _PKG_MAP = {"dotenv": "python-dotenv", "multipart": "python-multipart"}
-_STDLIB = {"os", "tempfile", "uuid", "pathlib", "__future__", "json", "typing"}
+_STDLIB = {
+    "os",
+    "tempfile",
+    "uuid",
+    "pathlib",
+    "__future__",
+    "json",
+    "typing",
+    "asyncio",
+    "base64",
+    "contextlib",
+    "dataclasses",
+    "collections",
+    "urllib",
+}
 _LOCAL_IMPORTS = {"api"}
 
 
@@ -139,7 +153,7 @@ def test_runtime_pins_supported_python(template_dir):
 
 
 def test_realtime_templates_have_audio_helpers(template_dir):
-    if template_dir.name in {"live-captions", "voice-agent"}:
+    if template_dir.name in {"live_captions", "voice_agent"}:
         assert (template_dir / "static" / "audio.js").exists()
 
 
diff --git a/tests/test_init_template_serve.py b/tests/test_init_template_serve.py
index 00bd120f..b402abb1 100644
--- a/tests/test_init_template_serve.py
+++ b/tests/test_init_template_serve.py
@@ -33,7 +33,9 @@
 TEMPLATE_NAMES = sorted(
     p.name for p in TEMPLATES_ROOT.iterdir() if p.is_dir() and not p.name.startswith("__")
 )
-TOKEN_TEMPLATES = ["live-captions", "voice-agent"]
+# live-captions now mints via the AssemblyAI SDK (covered in test_init_template_stream.py),
+# so it no longer has the httpx2 GET these parametrized tests mock.
+TOKEN_TEMPLATES = ["voice_agent"]
 HTTP_OK = 200
 HTTP_BAD_GATEWAY = 502
 
@@ -102,7 +104,7 @@ def test_app_applies_custom_base_url(monkeypatch: pytest.MonkeyPatch) -> None:
     # environment; the app must point the SDK at it. isolate_env strips it by default,
     # so set it here to exercise the import-time branch that applies it.
     monkeypatch.setenv("ASSEMBLYAI_BASE_URL", "https://api.example.test")
-    with serve("audio-transcription") as (module, _client):
+    with serve("audio_transcription") as (module, _client):
         assert module.aai.settings.base_url == "https://api.example.test"
 
 
@@ -118,7 +120,7 @@ def submit(_audio: object, config: object = None) -> object:
 
 
 def test_transcribe_url_returns_submitted_id(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         _fake_transcriber(monkeypatch, module, SimpleNamespace(id="t-1"))
         resp = client.post("/api/transcribe-url", json={"url": "https://example.com/a.mp3"})
         assert resp.status_code == HTTP_OK
@@ -126,7 +128,7 @@ def test_transcribe_url_returns_submitted_id(monkeypatch: pytest.MonkeyPatch) ->
 
 
 def test_transcribe_file_upload_returns_id(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         _fake_transcriber(monkeypatch, module, SimpleNamespace(id="t-2"))
         resp = client.post("/api/transcribe", files={"file": ("a.wav", b"RIFFfake", "audio/wav")})
         assert resp.status_code == HTTP_OK
@@ -134,7 +136,7 @@ def test_transcribe_file_upload_returns_id(monkeypatch: pytest.MonkeyPatch) -> N
 
 
 def test_transcribe_without_id_is_handled(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         _fake_transcriber(monkeypatch, module, SimpleNamespace(id=None))
         resp = client.post("/api/transcribe-url", json={"url": "https://example.com/a.mp3"})
         assert resp.status_code == HTTP_BAD_GATEWAY
@@ -142,7 +144,7 @@ def test_transcribe_without_id_is_handled(monkeypatch: pytest.MonkeyPatch) -> No
 
 
 def test_transcribe_failure_is_graceful(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         _fake_transcriber(monkeypatch, module, None, raises=True)
         resp = client.post("/api/transcribe-url", json={"url": "https://example.com/a.mp3"})
         assert resp.status_code == HTTP_BAD_GATEWAY
@@ -167,7 +169,7 @@ def fetch(_http: object, _tid: object) -> object:
 
 
 def test_status_completed_returns_transcript(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         status = module.aai.TranscriptStatus.completed
         done = SimpleNamespace(status=status, dict=lambda: {"id": "t", "text": "hi"})
         _fake_get_transcript(monkeypatch, module, done)
@@ -177,7 +179,7 @@ def test_status_completed_returns_transcript(monkeypatch: pytest.MonkeyPatch) ->
 
 
 def test_status_still_processing_reports_state(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         status = module.aai.TranscriptStatus.processing
         _fake_get_transcript(monkeypatch, module, SimpleNamespace(status=status))
         resp = client.get("/api/status/t")
@@ -186,7 +188,7 @@ def test_status_still_processing_reports_state(monkeypatch: pytest.MonkeyPatch)
 
 
 def test_status_error_is_handled(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         status = module.aai.TranscriptStatus.error
         _fake_get_transcript(monkeypatch, module, SimpleNamespace(status=status, error="boom"))
         resp = client.get("/api/status/t")
@@ -195,7 +197,7 @@ def test_status_error_is_handled(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_status_fetch_failure_is_graceful(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         _fake_get_transcript(monkeypatch, module, None, raises=True)
         resp = client.get("/api/status/t")
         assert resp.status_code == HTTP_BAD_GATEWAY
@@ -219,7 +221,7 @@ def make_client(**_kwargs: object) -> object:
 
 
 def test_ask_returns_answer(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         reply = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="yes"))])
         _fake_openai(monkeypatch, module, reply)
         resp = client.post("/api/ask", json={"transcript_id": "t", "question": "q?"})
@@ -228,7 +230,7 @@ def test_ask_returns_answer(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_ask_failure_is_graceful(monkeypatch: pytest.MonkeyPatch) -> None:
-    with serve("audio-transcription") as (module, client):
+    with serve("audio_transcription") as (module, client):
         _fake_openai(monkeypatch, module, None, raises=True)
         resp = client.post("/api/ask", json={"transcript_id": "t", "question": "q?"})
         assert resp.status_code == HTTP_BAD_GATEWAY
@@ -267,9 +269,9 @@ def test_token_failure_is_graceful(template: str, monkeypatch: pytest.MonkeyPatc
 
 # (template, method, path, json body) for one representative key-using endpoint each.
 MISSING_KEY_CASES = [
-    ("voice-agent", "post", "/api/token", None),
-    ("live-captions", "post", "/api/token", None),
-    ("audio-transcription", "post", "/api/transcribe-url", {"url": "https://example.com/a.mp3"}),
+    ("voice_agent", "post", "/api/token", None),
+    ("live_captions", "post", "/api/token", None),
+    ("audio_transcription", "post", "/api/transcribe-url", {"url": "https://example.com/a.mp3"}),
 ]
 
 
diff --git a/tests/test_init_template_stream.py b/tests/test_init_template_stream.py
index 60694936..90b785dd 100644
--- a/tests/test_init_template_stream.py
+++ b/tests/test_init_template_stream.py
@@ -5,12 +5,12 @@
 
 from fastapi.testclient import TestClient
 
-TEMPLATE_DIR = Path("aai_cli/init/templates/live-captions")
+TEMPLATE_DIR = Path("aai_cli/init/templates/live_captions")
 
 
 def _load_app(monkeypatch):
     # Boot with a key so the endpoint's missing-key guard passes and reaches the mock;
-    # preserve a key a test set before calling us (e.g. the raw-auth-header test).
+    # preserve a key a test set before calling us (e.g. the SDK-client test).
     monkeypatch.setenv("ASSEMBLYAI_API_KEY", os.environ.get("ASSEMBLYAI_API_KEY") or "test-key")
     for name in ("api.index", "api.settings", "api"):
         sys.modules.pop(name, None)
@@ -18,16 +18,17 @@ def _load_app(monkeypatch):
     return importlib.import_module("api.index")
 
 
-def _ok_response(mocker, token="tok-123"):
-    resp = mocker.MagicMock()
-    resp.json.return_value = {"token": token}
-    resp.raise_for_status.return_value = None
-    return resp
+def test_token_returns_token_and_streaming_ws_url(monkeypatch):
+    mod = _load_app(monkeypatch)
 
+    class FakeClient:
+        def __init__(self, options):
+            self.options = options
 
-def test_token_returns_token_and_streaming_ws_url(monkeypatch, mocker):
-    mod = _load_app(monkeypatch)
-    monkeypatch.setattr(mod.httpx2, "get", lambda *a, **k: _ok_response(mocker))
+        def create_temporary_token(self, expires_in_seconds):
+            return "tok-123"
+
+    monkeypatch.setattr(mod, "StreamingClient", FakeClient)
     resp = TestClient(mod.app).post("/api/token")
     assert resp.status_code == 200
     body = resp.json()
@@ -35,29 +36,43 @@ def test_token_returns_token_and_streaming_ws_url(monkeypatch, mocker):
     assert body["ws_url"].startswith("wss://") and body["ws_url"].endswith("/v3/ws")
 
 
-def test_token_uses_raw_authorization_header(monkeypatch, mocker):
-    # The streaming token uses the raw key as Authorization (NOT Bearer).
+def test_token_builds_sdk_client_with_key_and_host(monkeypatch):
     monkeypatch.setenv("ASSEMBLYAI_API_KEY", "sk-test")
     mod = _load_app(monkeypatch)
     captured = {}
 
-    def fake_get(url, params=None, headers=None):
-        captured.update(url=url, headers=headers)
-        return _ok_response(mocker)
+    class FakeOptions:
+        def __init__(self, *, api_key, api_host):
+            captured["api_key"] = api_key
+            captured["api_host"] = api_host
 
-    monkeypatch.setattr(mod.httpx2, "get", fake_get)
+    class FakeClient:
+        def __init__(self, options):
+            pass
+
+        def create_temporary_token(self, expires_in_seconds):
+            captured["expires_in_seconds"] = expires_in_seconds
+            return "tok-123"
+
+    monkeypatch.setattr(mod, "StreamingClientOptions", FakeOptions)
+    monkeypatch.setattr(mod, "StreamingClient", FakeClient)
     TestClient(mod.app).post("/api/token")
-    assert captured["headers"]["Authorization"] == "sk-test"
-    assert "/v3/token" in captured["url"]
+    assert captured["api_key"] == "sk-test"
+    assert captured["api_host"] == mod.settings.STREAMING_HOST
+    assert captured["expires_in_seconds"] == mod.settings.TOKEN_EXPIRES_IN_SECONDS
 
 
 def test_token_surfaces_error_as_502(monkeypatch):
     mod = _load_app(monkeypatch)
 
-    def boom(*a, **k):
-        raise RuntimeError("network down")
+    class FakeClient:
+        def __init__(self, options):
+            pass
+
+        def create_temporary_token(self, expires_in_seconds):
+            raise RuntimeError("network down")
 
-    monkeypatch.setattr(mod.httpx2, "get", boom)
+    monkeypatch.setattr(mod, "StreamingClient", FakeClient)
     resp = TestClient(mod.app).post("/api/token")
     assert resp.status_code == 502
 
diff --git a/tests/test_init_template_transcribe.py b/tests/test_init_template_transcribe.py
index 5eabc2d4..2993ece0 100644
--- a/tests/test_init_template_transcribe.py
+++ b/tests/test_init_template_transcribe.py
@@ -4,7 +4,7 @@
 
 from fastapi.testclient import TestClient
 
-TEMPLATE_DIR = Path("aai_cli/init/templates/audio-transcription")
+TEMPLATE_DIR = Path("aai_cli/init/templates/audio_transcription")
 
 
 def _load_app(monkeypatch, mocker):
diff --git a/tests/test_init_templates.py b/tests/test_init_templates.py
index f0c42a7e..8ae0d974 100644
--- a/tests/test_init_templates.py
+++ b/tests/test_init_templates.py
@@ -20,7 +20,7 @@ def test_every_registered_template_has_a_directory():
     # The registry must never advertise a template whose files don't ship — that
     # would crash `assembly init <id>` with a FileNotFoundError. This guards the picker.
     for tid in templates.TEMPLATES:
-        assert (_TEMPLATES_ROOT / tid / "api" / "index.py").exists(), (
+        assert (_TEMPLATES_ROOT / templates.dir_for(tid) / "api" / "index.py").exists(), (
             f"template {tid!r} is registered but aai_cli/init/templates/{tid}/ is missing"
         )
 
@@ -31,11 +31,26 @@ def test_every_shipped_directory_is_registered():
     # this enforces registry == shipped directories.
     for path in _TEMPLATES_ROOT.iterdir():
         if path.is_dir() and not path.name.startswith("__"):
-            assert path.name in templates.TEMPLATES, (
+            assert path.name.replace("_", "-") in templates.TEMPLATES, (
                 f"aai_cli/init/templates/{path.name}/ ships but isn't registered in TEMPLATES"
             )
 
 
+def test_descriptions_cover_every_template():
+    # Every template advertised in the picker needs a description (and no stray ones).
+    assert set(templates.DESCRIPTIONS) == set(templates.TEMPLATES)
+    assert all(templates.DESCRIPTIONS.values())  # none empty
+
+
+def test_description_for_each_template_has_distinctive_text():
+    # A keyword per description keeps the mutation gate honest on the literals.
+    assert "Transcribe" in templates.description_for("audio-transcription")
+    assert "captions" in templates.description_for("live-captions")
+    assert "Voice Agent" in templates.description_for("voice-agent")
+    assert "Cascaded" in templates.description_for("agent-framework")
+    assert templates.description_for("nope") == ""  # unknown id -> no description
+
+
 def test_title_for_known_and_unknown():
     assert "Audio Transcription" in templates.title_for("audio-transcription")
     assert templates.title_for("nope") == "nope"  # falls back to the raw id
@@ -44,3 +59,15 @@ def test_title_for_known_and_unknown():
 def test_is_template():
     assert templates.is_template("audio-transcription") is True
     assert templates.is_template("nope") is False
+
+
+def test_every_template_is_an_importable_package():
+    # Each template ships as a real package (templates/<dir>/api/...) so it can be
+    # imported and type-checked in-tree, not just copied out as scaffold text.
+    import importlib
+
+    for tid in templates.TEMPLATES:
+        module = importlib.import_module(
+            f"aai_cli.init.templates.{templates.dir_for(tid)}.api.index"
+        )
+        assert hasattr(module, "app"), f"{tid}: api.index must export `app`"