diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
new file mode 100644
index 00000000000..50a338cd2c1
--- /dev/null
+++ b/docs/ai-chat/backend.mdx
@@ -0,0 +1,1064 @@
+---
+title: "Backend"
+sidebarTitle: "Backend"
+description: "Three approaches to building your chat backend — chat.task(), session iterator, or raw task primitives."
+---
+
+## chat.task()
+
+The highest-level approach. Handles message accumulation, stop signals, turn lifecycle, and auto-piping automatically.
+
+<Tip>
+  To fix a **custom** `UIMessage` subtype (typed custom data parts, tool map, etc.), use [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task({...})`. Options are the same; defaults for `toUIMessageStream()` can be set on `withUIMessage`.
+</Tip>
+
+### Simple: return a StreamTextResult
+
+Return the `streamText` result from `run` and it's automatically piped to the frontend:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const simpleChat = chat.task({
+  id: "simple-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      system: "You are a helpful assistant.",
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Using chat.pipe() for complex flows
+
+For complex agent flows where `streamText` is called deep inside your code, use `chat.pipe()`. It works from **anywhere inside a task** — even nested function calls.
+
+```ts trigger/agent-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import type { ModelMessage } from "ai";
+
+export const agentChat = chat.task({
+  id: "agent-chat",
+  run: async ({ messages }) => {
+    // Don't return anything — chat.pipe is called inside
+    await runAgentLoop(messages);
+  },
+});
+
+async function runAgentLoop(messages: ModelMessage[]) {
+  // ... agent logic, tool calls, etc.
+
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+  });
+
+  // Pipe from anywhere — no need to return it
+  await chat.pipe(result);
+}
+```
+
+### Lifecycle hooks
+
+#### onPreload
+
+Fires when a preloaded run starts — before any messages arrive. Use it to eagerly initialize state (DB records, user context) while the user is still typing.
+
+Preloaded runs are triggered by calling `transport.preload(chatId)` on the frontend. See [Preload](/ai-chat/features#preload) for details.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onPreload: async ({ chatId, clientData, runId, chatAccessToken }) => {
+    // Initialize early — before the first message arrives
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    userContext.init({ name: user.name, plan: user.plan });
+
+    await db.chat.create({ data: { id: chatId, userId: clientData.userId } });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  onChatStart: async ({ preloaded }) => {
+    if (preloaded) return; // Already initialized in onPreload
+    // ... non-preloaded initialization
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+| Field             | Type                                          | Description                      |
+| ----------------- | --------------------------------------------- | -------------------------------- |
+| `chatId`          | `string`                                      | Chat session ID                  |
+| `runId`           | `string`                                      | The Trigger.dev run ID           |
+| `chatAccessToken` | `string`                                      | Scoped access token for this run |
+| `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend    |
+| `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks  |
+
+Every lifecycle callback receives a `writer` — a lazy stream writer that lets you send custom `UIMessageChunk` parts (like `data-*` parts) to the frontend without the ceremony of `chat.stream.writer()`. See [ChatWriter](/ai-chat/reference#chatwriter).
+
+#### onChatStart
+
+Fires once on the first turn (turn 0) before `run()` executes. Use it to create a chat record in your database.
+
+The `continuation` field tells you whether this is a brand new chat or a continuation of an existing one (where the previous run timed out or was cancelled). The `preloaded` field tells you whether `onPreload` already ran.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatStart: async ({ chatId, clientData, continuation, preloaded }) => {
+    if (preloaded) return; // Already set up in onPreload
+    if (continuation) return; // Chat record already exists
+
+    const { userId } = clientData as { userId: string };
+    await db.chat.create({
+      data: { id: chatId, userId, title: "New chat" },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  `clientData` contains custom data from the frontend — either the `clientData` option on the
+  transport constructor (sent with every message) or the `metadata` option on `sendMessage()`
+  (per-message). See [Client data and metadata](/ai-chat/frontend#client-data-and-metadata).
+</Tip>
+
+#### onTurnStart
+
+Fires at the start of every turn, after message accumulation and `onChatStart` (turn 0), but **before** `run()` executes. Use it to persist messages before streaming begins — so a mid-stream page refresh still shows the user's message.
+
+| Field             | Type                                          | Description                                     |
+| ----------------- | --------------------------------------------- | ----------------------------------------------- |
+| `chatId`          | `string`                                      | Chat session ID                                 |
+| `messages`        | `ModelMessage[]`                              | Full accumulated conversation (model format)    |
+| `uiMessages`      | `UIMessage[]`                                 | Full accumulated conversation (UI format)       |
+| `turn`            | `number`                                      | Turn number (0-indexed)                         |
+| `runId`           | `string`                                      | The Trigger.dev run ID                          |
+| `chatAccessToken` | `string`                                      | Scoped access token for this run                |
+| `continuation`    | `boolean`                                     | Whether this run is continuing an existing chat |
+| `preloaded`       | `boolean`                                     | Whether this run was preloaded                  |
+| `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend                   |
+| `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks                 |
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  By persisting in `onTurnStart`, the user's message is saved to your database before the AI starts
+  streaming. If the user refreshes mid-stream, the message is already there.
+</Tip>
+
+#### onBeforeTurnComplete
+
+Fires after the response is captured but **before** the stream closes. The `writer` can send custom chunks that appear in the current turn — use this for post-processing indicators, compaction progress, or any data the user should see before the turn ends.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onBeforeTurnComplete: async ({ writer, usage, uiMessages }) => {
+    // Write a custom data part while the stream is still open
+    writer.write({
+      type: "data-usage-summary",
+      data: {
+        tokens: usage?.totalTokens,
+        messageCount: uiMessages.length,
+      },
+    });
+
+    // You can also compact messages here and write progress
+    if (usage?.totalTokens && usage.totalTokens > 50_000) {
+      writer.write({ type: "data-compaction", data: { status: "compacting" } });
+      chat.setMessages(compactedMessages);
+      writer.write({ type: "data-compaction", data: { status: "complete" } });
+    }
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+Receives the same fields as [`TurnCompleteEvent`](/ai-chat/reference#turncompleteevent), plus a [`writer`](/ai-chat/reference#chatwriter).
+
+#### onTurnComplete
+
+Fires after each turn completes — after the response is captured and the stream is closed. This is the primary hook for persisting the assistant's response. Does not include a `writer` since the stream is already closed.
+
+| Field                | Type                     | Description                                                                                  |
+| -------------------- | ------------------------ | -------------------------------------------------------------------------------------------- |
+| `chatId`             | `string`                 | Chat session ID                                                                              |
+| `messages`           | `ModelMessage[]`         | Full accumulated conversation (model format)                                                 |
+| `uiMessages`         | `UIMessage[]`            | Full accumulated conversation (UI format)                                                    |
+| `newMessages`        | `ModelMessage[]`         | Only this turn's messages (model format)                                                     |
+| `newUIMessages`      | `UIMessage[]`            | Only this turn's messages (UI format)                                                        |
+| `responseMessage`    | `UIMessage \| undefined` | The assistant's response for this turn                                                       |
+| `turn`               | `number`                 | Turn number (0-indexed)                                                                      |
+| `runId`              | `string`                 | The Trigger.dev run ID                                                                       |
+| `chatAccessToken`    | `string`                 | Scoped access token for this run                                                             |
+| `lastEventId`        | `string \| undefined`    | Stream position for resumption. Persist this with the session.                               |
+| `stopped`            | `boolean`                | Whether the user stopped generation during this turn                                         |
+| `continuation`       | `boolean`                | Whether this run is continuing an existing chat                                              |
+| `rawResponseMessage` | `UIMessage \| undefined` | The raw assistant response before abort cleanup (same as `responseMessage` when not stopped) |
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken, lastEventId },
+      update: { runId, publicAccessToken: chatAccessToken, lastEventId },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  Use `uiMessages` to overwrite the full conversation each turn (simplest). Use `newUIMessages` if
+  you prefer to store messages individually — for example, one database row per message.
+</Tip>
+
+<Tip>
+  Persist `lastEventId` alongside the session. When the transport reconnects after a page refresh,
+  it uses this to skip past already-seen events — preventing duplicate messages.
+</Tip>
+
+### Using prompts
+
+Use [AI Prompts](/ai/prompts) to manage your system prompt as versioned, overridable config. Store the resolved prompt in a lifecycle hook with `chat.prompt.set()`, then spread `chat.toStreamTextOptions()` into `streamText` — it includes the system prompt, model, config, and telemetry automatically.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, createProviderRegistry } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const registry = createProviderRegistry({ openai });
+
+const systemPrompt = prompts.define({
+  id: "my-chat-system",
+  model: "openai:gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({ name: z.string() }),
+  content: `You are a helpful assistant for {{name}}.`,
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onChatStart: async ({ clientData }) => {
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    const resolved = await systemPrompt.resolve({ name: user.name });
+    chat.prompt.set(resolved);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }), // system, model, config, telemetry
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+`chat.toStreamTextOptions()` returns an object with `system`, `model` (resolved via the registry), `temperature`, and `experimental_telemetry` — all from the stored prompt. Properties you set after the spread (like a client-selected model) take precedence.
+
+<Tip>
+  See [Prompts](/ai/prompts) for the full guide — defining templates, variable schemas, dashboard
+  overrides, and the management SDK.
+</Tip>
+
+### Stop generation
+
+#### How stop works
+
+Calling `stop()` from `useChat` sends a stop signal to the running task via input streams. The task's `streamText` call aborts (if you passed `signal` or `stopSignal`), but the **run stays alive** and waits for the next message. The partial response is captured and accumulated normally.
+
+#### Abort signals
+
+The `run` function receives three abort signals:
+
+| Signal         | Fires when                                  | Use for                                                                |
+| -------------- | ------------------------------------------- | ---------------------------------------------------------------------- |
+| `signal`       | Stop **or** cancel                          | Pass to `streamText` — handles both cases. **Use this in most cases.** |
+| `stopSignal`   | Stop only (per-turn, reset each turn)       | Custom logic that should only run on user stop, not cancellation       |
+| `cancelSignal` | Run cancel, expire, or maxDuration exceeded | Cleanup that should only happen on full cancellation                   |
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal, stopSignal, cancelSignal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal, // Handles both stop and cancel
+    });
+  },
+});
+```
+
+<Tip>
+  Use `signal` (the combined signal) in most cases. The separate `stopSignal` and `cancelSignal` are
+  only needed if you want different behavior for stop vs cancel.
+</Tip>
+
+#### Detecting stop in callbacks
+
+The `onTurnComplete` event includes a `stopped` boolean that indicates whether the user stopped generation during that turn:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ chatId, uiMessages, stopped }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages, lastStoppedAt: stopped ? new Date() : undefined },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+You can also check stop status from **anywhere** during a turn using `chat.isStopped()`. This is useful inside `streamText`'s `onFinish` callback where the AI SDK's `isAborted` flag can be unreliable (e.g. when using `createUIMessageStream` + `writer.merge()`):
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+      onFinish: ({ isAborted }) => {
+        // isAborted may be false even after stop when using createUIMessageStream
+        const wasStopped = isAborted || chat.isStopped();
+        if (wasStopped) {
+          // handle stop — e.g. log analytics
+        }
+      },
+    });
+  },
+});
+```
+
+#### Cleaning up aborted messages
+
+When stop happens mid-stream, the captured response message can contain parts in an incomplete state — tool calls stuck in `partial-call`, reasoning blocks still marked as `streaming`, etc. These can cause UI issues like permanent spinners.
+
+`chat.task` automatically cleans up the `responseMessage` when stop is detected before passing it to `onTurnComplete`. If you use `chat.pipe()` manually and capture response messages yourself, use `chat.cleanupAbortedParts()`:
+
+```ts
+const cleaned = chat.cleanupAbortedParts(rawResponseMessage);
+```
+
+This removes tool invocation parts stuck in `partial-call` state and marks any `streaming` text or reasoning parts as `done`.
+
+<Note>
+  Stop signal delivery is best-effort. There is a small race window where the model may finish
+  before the stop signal arrives, in which case the turn completes normally with `stopped: false`.
+  This is expected and does not require special handling.
+</Note>
+
+### Persistence
+
+#### What needs to be persisted
+
+To build a chat app that survives page refreshes, you need to persist two things:
+
+1. **Messages** — The conversation history. Persisted **server-side** in the task via `onTurnStart` and `onTurnComplete`.
+2. **Sessions** — The transport's connection state (`runId`, `publicAccessToken`, `lastEventId`). Persisted **server-side** via `onTurnStart` and `onTurnComplete`.
+
+<Note>
+  Sessions let the transport reconnect to an existing run after a page refresh. Without them, every
+  page load would start a new run — losing the conversation context that was accumulated in the
+  previous run.
+</Note>
+
+#### Full persistence example
+
+<CodeGroup>
+```ts trigger/chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+import { db } from "@/lib/db";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({
+    userId: z.string(),
+  }),
+  onChatStart: async ({ chatId, clientData }) => {
+    await db.chat.create({
+      data: { id: chatId, userId: clientData.userId, title: "New chat", messages: [] },
+    });
+  },
+  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
+    // Persist messages + session before streaming
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    // Persist assistant response + stream position
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken, lastEventId },
+      update: { runId, publicAccessToken: chatAccessToken, lastEventId },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+```ts app/actions.ts
+"use server";
+
+import { chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "@/trigger/chat";
+import { db } from "@/lib/db";
+
+export const getChatToken = () => chat.createAccessToken<typeof myChat>("my-chat");
+
+export async function getChatMessages(chatId: string) {
+  const found = await db.chat.findUnique({ where: { id: chatId } });
+  return found?.messages ?? [];
+}
+
+export async function getAllSessions() {
+  const sessions = await db.chatSession.findMany();
+  const result: Record<
+    string,
+    {
+      runId: string;
+      publicAccessToken: string;
+      lastEventId?: string;
+    }
+  > = {};
+  for (const s of sessions) {
+    result[s.id] = {
+      runId: s.runId,
+      publicAccessToken: s.publicAccessToken,
+      lastEventId: s.lastEventId ?? undefined,
+    };
+  }
+  return result;
+}
+
+export async function deleteSession(chatId: string) {
+  await db.chatSession.delete({ where: { id: chatId } }).catch(() => {});
+}
+```
+
+```tsx app/components/chat.tsx
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+import { getChatToken, deleteSession } from "@/app/actions";
+
+export function Chat({ chatId, initialMessages, initialSessions }) {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken,
+    clientData: { userId: currentUser.id }, // Type-checked against clientDataSchema
+    sessions: initialSessions,
+    onSessionChange: (id, session) => {
+      if (!session) deleteSession(id);
+    },
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    messages: initialMessages,
+    transport,
+    resume: initialMessages.length > 0,
+  });
+
+  return (
+    <div>
+      {messages.map((m) => (
+        <div key={m.id}>
+          <strong>{m.role}:</strong>
+          {m.parts.map((part, i) =>
+            part.type === "text" ? <span key={i}>{part.text}</span> : null
+          )}
+        </div>
+      ))}
+
+      <form
+        onSubmit={(e) => {
+          e.preventDefault();
+          const input = e.currentTarget.querySelector("input");
+          if (input?.value) {
+            sendMessage({ text: input.value });
+            input.value = "";
+          }
+        }}
+      >
+        <input placeholder="Type a message..." />
+        <button type="submit" disabled={status === "streaming"}>
+          Send
+        </button>
+        {status === "streaming" && (
+          <button type="button" onClick={stop}>
+            Stop
+          </button>
+        )}
+      </form>
+    </div>
+  );
+}
+```
+
+</CodeGroup>
+
+### Pending messages (steering)
+
+Users can send messages while the agent is executing tool calls. With `pendingMessages`, these messages are injected between tool-call steps, steering the agent mid-execution:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  pendingMessages: {
+    shouldInject: ({ steps }) => steps.length > 0,
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      tools: {
+        /* ... */
+      },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+On the frontend, the `usePendingMessages` hook handles sending, tracking, and rendering injection points.
+
+<Tip>
+  See [Pending Messages](/ai-chat/pending-messages) for the full guide — backend configuration,
+  frontend hook, queuing vs steering, and how injection works with all three chat variants.
+</Tip>
+
+### Background injection
+
+Inject context from background work into the conversation using `chat.inject()`. Combine with `chat.defer()` to run analysis between turns and inject results before the next response — self-review, RAG augmentation, safety checks, etc.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    chat.defer(
+      (async () => {
+        const review = await generateObject({
+          /* ... */
+        });
+        if (review.object.needsImprovement) {
+          chat.inject([
+            {
+              role: "system",
+              content: `[Self-review]\n${review.object.suggestions.join("\n")}`,
+            },
+          ]);
+        }
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ...chat.toStreamTextOptions({ registry }), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  See [Background Injection](/ai-chat/background-injection) for the full guide — timing, self-review
+  example, and how it differs from pending messages.
+</Tip>
+
+### prepareMessages
+
+Transform model messages before they're used anywhere — in `run()`, in compaction rebuilds, and in compaction results. Define once, applied everywhere.
+
+Use this for Anthropic cache breaks, injecting system context, stripping PII, etc.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  prepareMessages: ({ messages, reason }) => {
+    // Add Anthropic cache breaks to the last message
+    if (messages.length === 0) return messages;
+    const last = messages[messages.length - 1];
+    return [
+      ...messages.slice(0, -1),
+      {
+        ...last,
+        providerOptions: {
+          ...last.providerOptions,
+          anthropic: { cacheControl: { type: "ephemeral" } },
+        },
+      },
+    ];
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+The `reason` field tells you why messages are being prepared:
+
+| Reason                 | Description                                       |
+| ---------------------- | ------------------------------------------------- |
+| `"run"`                | Messages being passed to `run()` for `streamText` |
+| `"compaction-rebuild"` | Rebuilding from a previous compaction summary     |
+| `"compaction-result"`  | Fresh compaction just produced these messages     |
+
+### Runtime configuration
+
+#### chat.setTurnTimeout()
+
+Override how long the run stays suspended waiting for the next message. Call from inside `run()`:
+
+```ts
+run: async ({ messages, signal }) => {
+  chat.setTurnTimeout("2h"); // Wait longer for this conversation
+  return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+#### chat.setIdleTimeoutInSeconds()
+
+Override how long the run stays idle (active, using compute) after each turn:
+
+```ts
+run: async ({ messages, signal }) => {
+  chat.setIdleTimeoutInSeconds(60); // Stay idle for 1 minute
+  return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+<Info>
+  Longer idle timeout means faster responses but more compute usage. Set to `0` to suspend
+  immediately after each turn (minimum latency cost, slight delay on next message).
+</Info>
+
+#### Stream options
+
+Control how `streamText` results are converted to the frontend stream via `toUIMessageStream()`. Set static defaults on the task, or override per-turn.
+
+##### Error handling with onError
+
+When `streamText` encounters an error mid-stream (rate limits, API failures, network errors), the `onError` callback converts it to a string that's sent to the frontend as an `{ type: "error", errorText }` chunk. The AI SDK's `useChat` receives this via its `onError` callback.
+
+By default, the raw error message is sent to the frontend. Use `onError` to sanitize errors and avoid leaking internal details:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    onError: (error) => {
+      // Log the full error server-side for debugging
+      console.error("Stream error:", error);
+      // Return a sanitized message — this is what the frontend sees
+      if (error instanceof Error && error.message.includes("rate limit")) {
+        return "Rate limited — please wait a moment and try again.";
+      }
+      return "Something went wrong. Please try again.";
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+`onError` is also called for tool execution errors, so a single handler covers both LLM errors and tool failures.
+
+On the frontend, handle the error in `useChat`:
+
+```tsx
+const { messages, sendMessage } = useChat({
+  transport,
+  onError: (error) => {
+    // error.message contains the string returned by your onError handler
+    toast.error(error.message);
+  },
+});
+```
+
+##### Reasoning and sources
+
+Control which AI SDK features are forwarded to the frontend:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    sendReasoning: true, // Forward model reasoning (default: true)
+    sendSources: true, // Forward source citations (default: false)
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+##### Per-turn overrides
+
+Override per-turn with `chat.setUIMessageStreamOptions()` — per-turn values merge with the static config (per-turn wins on conflicts). The override is cleared automatically after each turn.
+
+```ts
+run: async ({ messages, clientData, signal }) => {
+  // Enable reasoning only for certain models
+  if (clientData.model?.includes("claude")) {
+    chat.setUIMessageStreamOptions({ sendReasoning: true });
+  }
+  return streamText({ model: openai(clientData.model ?? "gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+`chat.setUIMessageStreamOptions()` works across all abstraction levels — `chat.task()`, `chat.createSession()` / `turn.complete()`, and `chat.pipeAndCapture()`.
+
+See [ChatUIMessageStreamOptions](/ai-chat/reference#chatuimessagestreamoptions) for the full reference.
+
+<Note>
+  `onFinish` is managed internally for response capture and cannot be overridden here. Use
+  `streamText`'s `onFinish` callback for custom finish handling, or use [raw task
+  mode](#raw-task-with-primitives) for full control over `toUIMessageStream()`.
+</Note>
+
+### Manual mode with task()
+
+If you need full control over task options, use the standard `task()` with `ChatTaskPayload` and `chat.pipe()`:
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskPayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const manualChat = task({
+  id: "manual-chat",
+  retry: { maxAttempts: 3 },
+  queue: { concurrencyLimit: 10 },
+  run: async (payload: ChatTaskPayload) => {
+    const result = streamText({
+      model: openai("gpt-4o"),
+      messages: payload.messages,
+    });
+
+    await chat.pipe(result);
+  },
+});
+```
+
+<Warning>
+  Manual mode does not get automatic message accumulation or the `onTurnComplete`/`onChatStart`
+  lifecycle hooks. The `responseMessage` field in `onTurnComplete` will be `undefined` when using
+  `chat.pipe()` directly. Use `chat.task()` for the full multi-turn experience.
+</Warning>
+
+---
+
+## chat.createSession()
+
+A middle ground between `chat.task()` and raw primitives. You get an async iterator that yields `ChatTurn` objects — each turn handles stop signals, message accumulation, and turn-complete signaling automatically. You control initialization, model/tool selection, persistence, and any custom per-turn logic.
+
+Use `chat.createSession()` inside a standard `task()`:
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = task({
+  id: "my-chat",
+  run: async (payload: ChatTaskWirePayload, { signal }) => {
+    // One-time initialization — just code, no hooks
+    const clientData = payload.metadata as { userId: string };
+    await db.chat.create({ data: { id: payload.chatId, userId: clientData.userId } });
+
+    const session = chat.createSession(payload, {
+      signal,
+      idleTimeoutInSeconds: 60,
+      timeout: "1h",
+    });
+
+    for await (const turn of session) {
+      const result = streamText({
+        model: openai("gpt-4o"),
+        messages: turn.messages,
+        abortSignal: turn.signal,
+      });
+
+      // Pipe, capture, accumulate, and signal turn-complete — all in one call
+      await turn.complete(result);
+
+      // Persist after each turn
+      await db.chat.update({
+        where: { id: turn.chatId },
+        data: { messages: turn.uiMessages },
+      });
+    }
+  },
+});
+```
+
+### ChatSessionOptions
+
+| Option                 | Type          | Default  | Description                                 |
+| ---------------------- | ------------- | -------- | ------------------------------------------- |
+| `signal`               | `AbortSignal` | required | Run-level cancel signal (from task context) |
+| `idleTimeoutInSeconds` | `number`      | `30`     | Seconds to stay idle between turns          |
+| `timeout`              | `string`      | `"1h"`   | Duration string for suspend timeout         |
+| `maxTurns`             | `number`      | `100`    | Max turns before ending                     |
+
+### ChatTurn
+
+Each turn yielded by the iterator provides:
+
+| Field          | Type             | Description                                            |
+| -------------- | ---------------- | ------------------------------------------------------ |
+| `number`       | `number`         | Turn number (0-indexed)                                |
+| `chatId`       | `string`         | Chat session ID                                        |
+| `trigger`      | `string`         | What triggered this turn                               |
+| `clientData`   | `unknown`        | Client data from the transport                         |
+| `messages`     | `ModelMessage[]` | Full accumulated model messages — pass to `streamText` |
+| `uiMessages`   | `UIMessage[]`    | Full accumulated UI messages — use for persistence     |
+| `signal`       | `AbortSignal`    | Combined stop+cancel signal (fresh each turn)          |
+| `stopped`      | `boolean`        | Whether the user stopped generation this turn          |
+| `continuation` | `boolean`        | Whether this is a continuation run                     |
+
+| Method                       | Description                                                         |
+| ---------------------------- | ------------------------------------------------------------------- |
+| `turn.complete(source)`      | Pipe stream, capture response, accumulate, and signal turn-complete |
+| `turn.done()`                | Just signal turn-complete (when you've piped manually)              |
+| `turn.addResponse(response)` | Add a response to the accumulator manually                          |
+
+### turn.complete() vs manual control
+
+`turn.complete(result)` is the easy path — it handles piping, capturing the response, accumulating messages, cleaning up aborted parts, and writing the turn-complete chunk.
+
+For more control, you can do each step manually:
+
+```ts
+for await (const turn of session) {
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+  });
+
+  // Manual: pipe and capture separately
+  const response = await chat.pipeAndCapture(result, { signal: turn.signal });
+
+  if (response) {
+    // Custom processing before accumulating
+    await turn.addResponse(response);
+  }
+
+  // Custom persistence, analytics, etc.
+  await db.chat.update({ ... });
+
+  // Must call done() when not using complete()
+  await turn.done();
+}
+```
+
+---
+
+## Raw task with primitives
+
+For full control, use a standard `task()` with the composable primitives from the `chat` namespace. You manage everything: the turn loop, stop signals, message accumulation, and turn-complete signaling.
+
+Raw task mode also lets you call `.toUIMessageStream()` yourself with any options — including `onFinish` and `originalMessages`. This is the right choice when you need complete control over the stream conversion beyond what `chat.setUIMessageStreamOptions()` provides.
+
+### Primitives
+
+| Primitive                       | Description                                                                                 |
+| ------------------------------- | ------------------------------------------------------------------------------------------- |
+| `chat.messages`                 | Input stream for incoming messages — use `.waitWithIdleTimeout()` to wait for the next turn |
+| `chat.createStopSignal()`       | Create a managed stop signal wired to the stop input stream                                 |
+| `chat.pipeAndCapture(result)`   | Pipe a `StreamTextResult` to the chat stream and capture the response                       |
+| `chat.writeTurnComplete()`      | Signal the frontend that the current turn is complete                                       |
+| `chat.MessageAccumulator`       | Accumulates conversation messages across turns                                              |
+| `chat.pipe(stream)`             | Pipe a stream to the frontend (no response capture)                                         |
+| `chat.cleanupAbortedParts(msg)` | Clean up incomplete parts from a stopped response                                           |
+
+### Example
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = task({
+  id: "my-chat-raw",
+  run: async (payload: ChatTaskWirePayload, { signal: runSignal }) => {
+    let currentPayload = payload;
+
+    // Handle preload — wait for the first real message
+    if (currentPayload.trigger === "preload") {
+      const result = await chat.messages.waitWithIdleTimeout({
+        idleTimeoutInSeconds: 60,
+        timeout: "1h",
+        spanName: "waiting for first message",
+      });
+      if (!result.ok) return;
+      currentPayload = result.output;
+    }
+
+    const stop = chat.createStopSignal();
+    const conversation = new chat.MessageAccumulator();
+
+    for (let turn = 0; turn < 100; turn++) {
+      stop.reset();
+
+      const messages = await conversation.addIncoming(
+        currentPayload.messages,
+        currentPayload.trigger,
+        turn
+      );
+
+      const combinedSignal = AbortSignal.any([runSignal, stop.signal]);
+
+      const result = streamText({
+        model: openai("gpt-4o"),
+        messages,
+        abortSignal: combinedSignal,
+      });
+
+      let response;
+      try {
+        response = await chat.pipeAndCapture(result, { signal: combinedSignal });
+      } catch (error) {
+        if (error instanceof Error && error.name === "AbortError") {
+          if (runSignal.aborted) break;
+          // Stop — fall through to accumulate partial
+        } else {
+          throw error;
+        }
+      }
+
+      if (response) {
+        const cleaned =
+          stop.signal.aborted && !runSignal.aborted ? chat.cleanupAbortedParts(response) : response;
+        await conversation.addResponse(cleaned);
+      }
+
+      if (runSignal.aborted) break;
+
+      // Persist, analytics, etc.
+      await db.chat.update({
+        where: { id: currentPayload.chatId },
+        data: { messages: conversation.uiMessages },
+      });
+
+      await chat.writeTurnComplete();
+
+      // Wait for the next message
+      const next = await chat.messages.waitWithIdleTimeout({
+        idleTimeoutInSeconds: 60,
+        timeout: "1h",
+        spanName: "waiting for next message",
+      });
+      if (!next.ok) break;
+      currentPayload = next.output;
+    }
+
+    stop.cleanup();
+  },
+});
+```
+
+### MessageAccumulator
+
+The `MessageAccumulator` handles the transport protocol automatically:
+
+- Turn 0: replaces messages (full history from frontend)
+- Subsequent turns: appends new messages (frontend only sends the new user message)
+- Regenerate: replaces messages (full history minus last assistant message)
+
+```ts
+const conversation = new chat.MessageAccumulator();
+
+// Returns full accumulated ModelMessage[] for streamText
+const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+// After piping, add the response
+const response = await chat.pipeAndCapture(result);
+if (response) await conversation.addResponse(response);
+
+// Access accumulated messages for persistence
+conversation.uiMessages; // UIMessage[]
+conversation.modelMessages; // ModelMessage[]
+```
diff --git a/docs/ai-chat/background-injection.mdx b/docs/ai-chat/background-injection.mdx
new file mode 100644
index 00000000000..b50c86329f6
--- /dev/null
+++ b/docs/ai-chat/background-injection.mdx
@@ -0,0 +1,192 @@
+---
+title: "Background injection"
+sidebarTitle: "Background injection"
+description: "Inject context from background work into the agent's conversation — self-review, RAG augmentation, or any async analysis."
+---
+
+## Overview
+
+`chat.inject()` queues model messages for injection into the conversation. Messages are picked up at the start of the next turn or at the next `prepareStep` boundary (between tool-call steps).
+
+This is the backend counterpart to [pending messages](/ai-chat/pending-messages) — pending messages come from the user via the frontend, while `chat.inject()` comes from your task code.
+
+## Basic usage
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+// Queue a system message for injection
+chat.inject([
+  {
+    role: "system",
+    content: "The user's account was just upgraded to Pro.",
+  },
+]);
+```
+
+Messages are appended to the model messages before the next LLM inference call. The LLM sees them as part of the conversation context.
+
+## Common pattern: defer + inject
+
+The most powerful pattern combines `chat.defer()` (background work) with `chat.inject()` (inject results). Background work runs in parallel with the idle wait between turns, and results are injected before the next response.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    // Kick off background analysis — doesn't block the turn
+    chat.defer(
+      (async () => {
+        const analysis = await analyzeConversation(messages);
+        chat.inject([
+          {
+            role: "system",
+            content: `[Analysis of conversation so far]\n\n${analysis}`,
+          },
+        ]);
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Timing
+
+1. Turn completes, `onTurnComplete` fires
+2. `chat.defer()` registers the background work
+3. The run immediately starts waiting for the next message (no blocking)
+4. Background work completes, `chat.inject()` queues the messages
+5. User sends next message, turn starts
+6. Injected messages are appended before `run()` executes
+7. The LLM sees the injected context alongside the new user message
+
+If the background work finishes *during* a tool-call loop (not between turns), the messages are picked up at the next `prepareStep` boundary instead.
+
+## Example: self-review
+
+A cheap model reviews the agent's response after each turn and injects coaching for the next one. Uses [Prompts](/ai/prompts) for the review prompt and `generateObject` for structured output.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, generateObject, createProviderRegistry } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const registry = createProviderRegistry({ openai });
+
+const selfReviewPrompt = prompts.define({
+  id: "self-review",
+  model: "openai:gpt-4o-mini",
+  content: `You are a conversation quality reviewer. Analyze the assistant's most recent response.
+
+Focus on:
+- Whether the response answered the user's question
+- Missed opportunities to use tools or provide more detail
+- Tone mismatches
+
+Be concise. Only flag issues worth fixing.`,
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    chat.defer(
+      (async () => {
+        const resolved = await selfReviewPrompt.resolve({});
+
+        const review = await generateObject({
+          model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
+          ...resolved.toAISDKTelemetry(),
+          system: resolved.text,
+          prompt: messages
+            .filter((m) => m.role === "user" || m.role === "assistant")
+            .map((m) => {
+              const text =
+                typeof m.content === "string"
+                  ? m.content
+                  : Array.isArray(m.content)
+                    ? m.content
+                        .filter((p: any) => p.type === "text")
+                        .map((p: any) => p.text)
+                        .join("")
+                    : "";
+              return `${m.role}: ${text}`;
+            })
+            .join("\n\n"),
+          schema: z.object({
+            needsImprovement: z.boolean(),
+            suggestions: z.array(z.string()),
+          }),
+        });
+
+        if (review.object.needsImprovement) {
+          chat.inject([
+            {
+              role: "system",
+              content: `[Self-review]\n\n${review.object.suggestions.map((s) => `- ${s}`).join("\n")}\n\nApply these naturally.`,
+            },
+          ]);
+        }
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+The self-review runs on `gpt-4o-mini` (fast, cheap) in the background. If the user sends another message before it completes, the coaching is still injected — `chat.inject()` persists across the idle wait.
+
+## Other use cases
+
+- **RAG augmentation**: After each turn, fetch relevant documents and inject them as context for the next response
+- **Safety checks**: Run a moderation model on the response, inject warnings if issues are detected
+- **Fact-checking**: Verify claims in the response using search tools, inject corrections
+- **Context enrichment**: Look up user/account data based on what was discussed, inject it as system context
+
+## How it differs from pending messages
+
+| | `chat.inject()` | [Pending messages](/ai-chat/pending-messages) |
+|---|---|---|
+| **Source** | Backend task code | Frontend user input |
+| **Triggered by** | Your code (e.g. `onTurnComplete` + `chat.defer()`) | User sending a message during streaming |
+| **Injection point** | Start of next turn, or next `prepareStep` boundary | Next `prepareStep` boundary only |
+| **Message role** | Any (`system`, `user`, `assistant`) | Typically `user` |
+| **Frontend visibility** | Not visible unless you write custom `data-*` chunks | Visible via `usePendingMessages` hook |
+
+## API reference
+
+### chat.inject()
+
+```ts
+chat.inject(messages: ModelMessage[]): void
+```
+
+Queue model messages for injection at the next opportunity. Messages persist across the idle wait between turns — they are not reset when a new turn starts.
+
+**Parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `messages` | `ModelMessage[]` | Model messages to inject (from the `ai` package) |
+
+Messages are drained (consumed) when:
+1. A new turn starts — before `run()` executes
+2. A `prepareStep` boundary is reached — between tool-call steps during streaming
+
+<Note>
+  `chat.inject()` writes to an in-memory queue in the current process. It works from any code running in the same task — lifecycle hooks, deferred work, tool execute functions, etc. It does not work from subtasks or other runs.
+</Note>
diff --git a/docs/ai-chat/compaction.mdx b/docs/ai-chat/compaction.mdx
new file mode 100644
index 00000000000..5f2c61245e9
--- /dev/null
+++ b/docs/ai-chat/compaction.mdx
@@ -0,0 +1,296 @@
+---
+title: "Compaction"
+sidebarTitle: "Compaction"
+description: "Automatic context compaction to keep long conversations within token limits."
+---
+
+## Overview
+
+Long conversations accumulate tokens across turns. Eventually the context window fills up, causing errors or degraded responses. Compaction solves this by automatically summarizing the conversation when token usage exceeds a threshold, then using that summary as the context for future turns.
+
+The `compaction` option on `chat.task()` handles this in both paths:
+
+- **Between tool-call steps** (inner loop) — via the AI SDK's `prepareStep`, compaction runs between tool calls within a single turn
+- **Between turns** (outer loop) — for single-step responses with no tool calls, where `prepareStep` never fires
+
+## Basic usage
+
+Provide `shouldCompact` to decide when to compact and `summarize` to generate the summary:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => {
+      const result = await generateText({
+        model: openai("gpt-4o-mini"),
+        messages: [...messages, { role: "user", content: "Summarize this conversation concisely." }],
+      });
+      return result.text;
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+<Note>
+  The `prepareStep` for inner-loop compaction is automatically injected when you spread `chat.toStreamTextOptions()` into your `streamText` call. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
+</Note>
+
+## How it works
+
+After each turn completes:
+
+1. `shouldCompact` is called with the current token usage
+2. If it returns `true`, `summarize` generates a summary from the model messages
+3. The **model messages** (sent to the LLM) are replaced with the summary
+4. The **UI messages** (persisted and displayed) are preserved by default
+5. The `onCompacted` hook fires if configured
+
+On the next turn, the LLM receives the compact summary instead of the full history — dramatically reducing token usage while preserving context.
+
+## Customizing what gets persisted
+
+By default, compaction only affects model messages — UI messages stay intact so users see the full conversation after a page refresh. You can customize this with `compactUIMessages`:
+
+### Summary + recent messages
+
+Replace older messages with a summary but keep the last few exchanges visible:
+
+```ts
+import { generateId } from "ai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => {
+      return generateText({
+        model: openai("gpt-4o-mini"),
+        messages: [...messages, { role: "user", content: "Summarize." }],
+      }).then((r) => r.text);
+    },
+    compactUIMessages: ({ uiMessages, summary }) => [
+      {
+        id: generateId(),
+        role: "assistant",
+        parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+      },
+      ...uiMessages.slice(-4), // Keep the last 4 messages
+    ],
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Flatten to summary only
+
+Replace all messages with just the summary (like the LLM sees):
+
+```ts
+compactUIMessages: ({ summary }) => [
+  {
+    id: generateId(),
+    role: "assistant",
+    parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+  },
+],
+```
+
+## Customizing model messages
+
+By default, model messages are replaced with a single summary message. Use `compactModelMessages` to customize what the LLM sees after compaction:
+
+### Summary + recent context
+
+Keep the last few model messages so the LLM has recent detail alongside the summary:
+
+```ts
+compactModelMessages: ({ modelMessages, summary }) => [
+  { role: "user", content: summary },
+  ...modelMessages.slice(-2), // Keep last exchange for detail
+],
+```
+
+### Keep tool results
+
+Preserve tool-call results so the LLM remembers what tools returned:
+
+```ts
+compactModelMessages: ({ modelMessages, summary }) => [
+  { role: "user", content: summary },
+  ...modelMessages.filter((m) => m.role === "tool"),
+],
+```
+
+## shouldCompact event
+
+The `shouldCompact` callback receives context about the current state:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Current model messages |
+| `totalTokens` | `number \| undefined` | Total tokens from the triggering step/turn |
+| `inputTokens` | `number \| undefined` | Input tokens |
+| `outputTokens` | `number \| undefined` | Output tokens |
+| `usage` | `LanguageModelUsage` | Full usage object |
+| `totalUsage` | `LanguageModelUsage` | Cumulative usage across all turns |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn (0-indexed) |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Whether this is between steps or between turns |
+| `steps` | `CompactionStep[]` | Steps array (inner loop only) |
+| `stepNumber` | `number` | Step index (inner loop only) |
+
+## summarize event
+
+The `summarize` callback receives similar context:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Messages to summarize |
+| `usage` | `LanguageModelUsage` | Usage from the triggering step/turn |
+| `totalUsage` | `LanguageModelUsage` | Cumulative usage |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Where compaction is running |
+| `stepNumber` | `number` | Step index (inner loop only) |
+
+## onCompacted hook
+
+Track compaction events for logging, billing, or analytics:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  compaction: { ... },
+  onCompacted: async ({ summary, totalTokens, messageCount, chatId, turn }) => {
+    logger.info("Compacted", { chatId, turn, totalTokens, messageCount });
+    await db.compactionLog.create({
+      data: { chatId, summary, totalTokens, messageCount },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+## Using with chat.createSession()
+
+Pass the same `compaction` config to `chat.createSession()`. The session handles outer-loop compaction automatically inside `turn.complete()`:
+
+```ts
+const session = chat.createSession(payload, {
+  signal,
+  idleTimeoutInSeconds: 60,
+  timeout: "1h",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) =>
+      generateText({ model: openai("gpt-4o-mini"), messages }).then((r) => r.text),
+    compactUIMessages: ({ uiMessages, summary }) => [
+      { id: generateId(), role: "assistant",
+        parts: [{ type: "text", text: `[Summary]\n\n${summary}` }] },
+      ...uiMessages.slice(-4),
+    ],
+  },
+});
+
+for await (const turn of session) {
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+  });
+
+  await turn.complete(result);
+  // Outer-loop compaction runs automatically after complete()
+
+  await db.chat.update({
+    where: { id: turn.chatId },
+    data: { messages: turn.uiMessages },
+  });
+}
+```
+
+## Using with raw tasks (MessageAccumulator)
+
+Pass `compaction` to the `MessageAccumulator` constructor. Use `prepareStep()` for inner-loop compaction and `compactIfNeeded()` for the outer loop:
+
+```ts
+const conversation = new chat.MessageAccumulator({
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) =>
+      generateText({ model: openai("gpt-4o-mini"), messages }).then((r) => r.text),
+    compactUIMessages: ({ summary }) => [
+      { id: generateId(), role: "assistant",
+        parts: [{ type: "text", text: `[Summary]\n\n${summary}` }] },
+    ],
+  },
+});
+
+for (let turn = 0; turn < 100; turn++) {
+  const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+    prepareStep: conversation.prepareStep(), // Inner-loop compaction
+  });
+
+  const response = await chat.pipeAndCapture(result);
+  if (response) await conversation.addResponse(response);
+
+  // Outer-loop compaction
+  const usage = await result.totalUsage;
+  await conversation.compactIfNeeded(usage, { chatId: payload.chatId, turn });
+
+  await db.chat.update({ data: { messages: conversation.uiMessages } });
+  await chat.writeTurnComplete();
+}
+```
+
+## Fully manual compaction
+
+For maximum control, use `chat.compact()` directly inside a custom `prepareStep`:
+
+```ts
+prepareStep: async ({ messages: stepMessages, steps }) => {
+  const result = await chat.compact(stepMessages, steps, {
+    threshold: 80_000,
+    summarize: async (msgs) =>
+      generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
+  });
+  return result.type === "skipped" ? undefined : result;
+},
+```
+
+Or use the `chat.compactionStep()` factory:
+
+```ts
+prepareStep: chat.compactionStep({
+  threshold: 80_000,
+  summarize: async (msgs) =>
+    generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
+}),
+```
+
+<Note>
+  The fully manual APIs only handle inner-loop compaction (between tool-call steps). For outer-loop coverage, use the `compaction` option on `chat.task()`, `chat.createSession()`, or `MessageAccumulator`.
+</Note>
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
new file mode 100644
index 00000000000..4b262e3929c
--- /dev/null
+++ b/docs/ai-chat/features.mdx
@@ -0,0 +1,439 @@
+---
+title: "Features"
+sidebarTitle: "Features"
+description: "Per-run data, deferred work, custom streaming, subtask integration, and preload."
+---
+
+## Per-run data with chat.local
+
+Use `chat.local` to create typed, run-scoped data that persists across turns and is accessible from anywhere — the run function, tools, nested helpers. Each run gets its own isolated copy, and locals are automatically cleared between runs.
+
+When a subtask is invoked via `ai.toolExecute()` (or the deprecated `ai.tool()`), initialized locals are automatically serialized into the subtask's metadata and hydrated on first access — no extra code needed. Subtask changes to hydrated locals are local to the subtask and don't propagate back to the parent.
+
+### Declaring and initializing
+
+Declare locals at module level with a unique `id`, then initialize them inside a lifecycle hook where you have context (chatId, clientData, etc.):
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+import { db } from "@/lib/db";
+
+// Declare at module level — each local needs a unique id
+const userContext = chat.local<{
+  name: string;
+  plan: "free" | "pro";
+  messageCount: number;
+}>({ id: "userContext" });
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onChatStart: async ({ clientData }) => {
+    // Initialize with real data from your database
+    const user = await db.user.findUnique({
+      where: { id: clientData.userId },
+    });
+    userContext.init({
+      name: user.name,
+      plan: user.plan,
+      messageCount: user.messageCount,
+    });
+  },
+  run: async ({ messages, signal }) => {
+    userContext.messageCount++;
+
+    return streamText({
+      model: openai("gpt-4o"),
+      system: `Helping ${userContext.name} (${userContext.plan} plan).`,
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Accessing from tools
+
+Locals are accessible from anywhere during task execution — including AI SDK tools:
+
+```ts
+const userContext = chat.local<{ plan: "free" | "pro" }>({ id: "userContext" });
+
+const premiumTool = tool({
+  description: "Access premium features",
+  inputSchema: z.object({ feature: z.string() }),
+  execute: async ({ feature }) => {
+    if (userContext.plan !== "pro") {
+      return { error: "This feature requires a Pro plan." };
+    }
+    // ... premium logic
+  },
+});
+```
+
+### Accessing from subtasks
+
+When you use `ai.toolExecute()` inside AI SDK `tool()` to expose a subtask, chat locals are automatically available read-only:
+
+```ts
+import { chat, ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const userContext = chat.local<{ name: string; plan: "free" | "pro" }>({ id: "userContext" });
+
+export const analyzeDataTask = schemaTask({
+  id: "analyze-data",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    // userContext.name just works — auto-hydrated from parent metadata
+    console.log(`Analyzing for ${userContext.name}`);
+    // Changes here are local to this subtask and don't propagate back
+  },
+});
+
+const analyzeData = tool({
+  description: analyzeDataTask.description ?? "",
+  inputSchema: analyzeDataTask.schema!,
+  execute: ai.toolExecute(analyzeDataTask),
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatStart: async ({ clientData }) => {
+    userContext.init({ name: "Alice", plan: "pro" });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      tools: { analyzeData },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+<Note>
+  Values must be JSON-serializable for subtask access. Non-serializable values (functions, class instances, etc.) will be lost during transfer.
+</Note>
+
+### Dirty tracking and persistence
+
+The `hasChanged()` method returns `true` if any property was set since the last check, then resets the flag. Use it in lifecycle hooks to only persist when data actually changed:
+
+```ts
+onTurnComplete: async ({ chatId }) => {
+  if (userContext.hasChanged()) {
+    await db.user.update({
+      where: { id: userContext.get().userId },
+      data: {
+        messageCount: userContext.messageCount,
+      },
+    });
+  }
+},
+```
+
+### chat.local API
+
+| Method | Description |
+|--------|-------------|
+| `chat.local<T>({ id })` | Create a typed local with a unique id (declare at module level) |
+| `local.init(value)` | Initialize with a value (call in hooks or `run`) |
+| `local.hasChanged()` | Returns `true` if modified since last check, resets flag |
+| `local.get()` | Returns a plain object copy (for serialization) |
+| `local.property` | Direct property access (read/write via Proxy) |
+
+<Note>
+  Locals use shallow proxying. Nested object mutations like `local.prefs.theme = "dark"` won't trigger the dirty flag. Instead, replace the whole property: `local.prefs = { ...local.prefs, theme: "dark" }`.
+</Note>
+
+---
+
+## chat.defer()
+
+Use `chat.defer()` to run background work in parallel with streaming. The deferred promise runs alongside the LLM response and is awaited (with a 5s timeout) before `onTurnComplete` fires.
+
+This moves non-blocking work (DB writes, analytics, etc.) out of the critical path:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    // Persist messages without blocking the LLM call
+    chat.defer(db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }));
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+`chat.defer()` can be called from anywhere during a turn — hooks, `run()`, or nested helpers. All deferred promises are collected and awaited together before `onTurnComplete`.
+
+---
+
+## Custom streaming with chat.stream
+
+`chat.stream` is a typed stream bound to the chat output. Use it to write custom `UIMessageChunk` data alongside the AI-generated response — for example, status updates or progress indicators.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    // Write a custom data part to the chat stream.
+    // The AI SDK's data-* chunk protocol adds this to message.parts
+    // on the frontend, where you can render it however you like.
+    const { waitUntilComplete } = chat.stream.writer({
+      execute: ({ write }) => {
+        write({
+          type: "data-status",
+          id: "search-progress",
+          data: { message: "Searching the web...", progress: 0.5 },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    // Then stream the AI response
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  Use `data-*` chunk types (e.g. `data-status`, `data-progress`) for custom data. The AI SDK processes these into `DataUIPart` objects in `message.parts` on the frontend. Writing the same `type` + `id` again updates the existing part instead of creating a new one — useful for live progress.
+</Tip>
+
+<Tip>
+  Inside lifecycle callbacks (`onPreload`, `onChatStart`, `onTurnStart`, `onBeforeTurnComplete`, `onCompacted`), you can use the `writer` parameter instead of `chat.stream.writer()` — it's simpler and avoids the `execute` + `waitUntilComplete` boilerplate. See [ChatWriter](/ai-chat/reference#chatwriter).
+</Tip>
+
+`chat.stream` exposes the full stream API:
+
+| Method | Description |
+|--------|-------------|
+| `chat.stream.writer(options)` | Write individual chunks via a callback |
+| `chat.stream.pipe(stream, options?)` | Pipe a `ReadableStream` or `AsyncIterable` |
+| `chat.stream.append(value, options?)` | Append raw data |
+| `chat.stream.read(runId, options?)` | Read the stream by run ID |
+
+### Streaming from subtasks
+
+When a tool invokes a subtask via `triggerAndWait`, the subtask can stream directly to the parent chat using `target: "root"`:
+
+```ts
+import { chat, ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { streamText, tool, generateId } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+// A subtask that streams progress back to the parent chat
+export const researchTask = schemaTask({
+  id: "research",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    const partId = generateId();
+
+    // Write a data-* chunk to the root run's chat stream.
+    // The frontend receives this as a DataUIPart in message.parts.
+    const { waitUntilComplete } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-research-status",
+          id: partId,
+          data: { query, status: "in-progress" },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    // Do the work...
+    const result = await doResearch(query);
+
+    // Update the same part with the final status
+    const { waitUntilComplete: waitDone } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-research-status",
+          id: partId,
+          data: { query, status: "done", resultCount: result.length },
+        });
+      },
+    });
+    await waitDone();
+
+    return result;
+  },
+});
+
+const research = tool({
+  description: researchTask.description ?? "",
+  inputSchema: researchTask.schema!,
+  execute: ai.toolExecute(researchTask),
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+      tools: {
+        research,
+      },
+    });
+  },
+});
+```
+
+On the frontend, render the custom data part:
+
+```tsx
+{message.parts.map((part, i) => {
+  if (part.type === "data-research-status") {
+    const { query, status, resultCount } = part.data;
+    return (
+      <div key={i}>
+        {status === "done" ? `Found ${resultCount} results` : `Researching "${query}"...`}
+      </div>
+    );
+  }
+  // ...other part types
+})}
+```
+
+The `target` option accepts:
+- `"self"` — current run (default)
+- `"parent"` — parent task's run
+- `"root"` — root task's run (the chat task)
+- A specific run ID string
+
+---
+
+## Task tool subtasks (`ai.toolExecute`)
+
+When a subtask runs through **`execute: ai.toolExecute(task)`** (or the deprecated `ai.tool()`), it can access the tool call context and chat context from the parent:
+
+```ts
+import { ai, chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "./chat";
+
+export const mySubtask = schemaTask({
+  id: "my-subtask",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    // Get the AI SDK's tool call ID (useful for data-* chunk IDs)
+    const toolCallId = ai.toolCallId();
+
+    // Get typed chat context — pass typeof yourChatTask for typed clientData
+    const { chatId, clientData } = ai.chatContextOrThrow<typeof myChat>();
+    // clientData is typed based on myChat's clientDataSchema
+
+    // Write a data chunk using the tool call ID
+    const { waitUntilComplete } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-progress",
+          id: toolCallId,
+          data: { status: "working", query, userId: clientData?.userId },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    return { result: "done" };
+  },
+});
+```
+
+| Helper | Returns | Description |
+|--------|---------|-------------|
+| `ai.toolCallId()` | `string \| undefined` | The AI SDK tool call ID |
+| `ai.chatContext<typeof myChat>()` | `{ chatId, turn, continuation, clientData } \| undefined` | Chat context with typed `clientData`. Returns `undefined` if not in a chat context. |
+| `ai.chatContextOrThrow<typeof myChat>()` | `{ chatId, turn, continuation, clientData }` | Same as above but throws if not in a chat context |
+| `ai.currentToolOptions()` | `ToolCallExecutionOptions \| undefined` | Full tool execution options |
+
+---
+
+## Preload
+
+Preload eagerly triggers a run for a chat before the first message is sent. This allows initialization (DB setup, context loading) to happen while the user is still typing, reducing first-response latency.
+
+### Frontend
+
+Call `transport.preload(chatId)` to start a run early:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+export function Chat({ chatId }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: getChatToken,
+    clientData: { userId: currentUser.id },
+  });
+
+  // Preload on mount — run starts before the user types anything
+  useEffect(() => {
+    transport.preload(chatId, { idleTimeoutInSeconds: 60 });
+  }, [chatId]);
+
+  const { messages, sendMessage } = useChat({ id: chatId, transport });
+  // ...
+}
+```
+
+Preload is a no-op if a session already exists for this chatId.
+
+When the transport needs a trigger token for preload, your `accessToken` callback receives `{ chatId, purpose: "preload" }` (same as for a normal trigger, but `purpose` is `"trigger"` when starting a run from `sendMessages`). See [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
+
+### Backend
+
+On the backend, the `onPreload` hook fires immediately. The run then waits for the first message. When the user sends a message, `onChatStart` fires with `preloaded: true` — you can skip initialization that was already done in `onPreload`:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onPreload: async ({ chatId, clientData }) => {
+    // Eagerly initialize — runs before the first message
+    userContext.init(await loadUser(clientData.userId));
+    await db.chat.create({ data: { id: chatId } });
+  },
+  onChatStart: async ({ preloaded }) => {
+    if (preloaded) return; // Already initialized in onPreload
+    // ... fallback initialization for non-preloaded runs
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+With `chat.createSession()` or raw tasks, check `payload.trigger === "preload"` and wait for the first message:
+
+```ts
+if (payload.trigger === "preload") {
+  // Initialize early...
+  const result = await chat.messages.waitWithIdleTimeout({
+    idleTimeoutInSeconds: 60,
+    timeout: "1h",
+  });
+  if (!result.ok) return;
+  currentPayload = result.output;
+}
+```
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
new file mode 100644
index 00000000000..c03eb484565
--- /dev/null
+++ b/docs/ai-chat/frontend.mdx
@@ -0,0 +1,264 @@
+---
+title: "Frontend"
+sidebarTitle: "Frontend"
+description: "Transport setup, session management, client data, and frontend patterns for AI Chat."
+---
+
+## Transport setup
+
+Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+import type { myChat } from "@/trigger/chat";
+import { getChatToken } from "@/app/actions";
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken,
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({ transport });
+  // ... render UI
+}
+```
+
+The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
+
+<Tip>
+  The hook keeps `onSessionChange` up to date via a ref internally, so you don't need to memoize the
+  callback or worry about stale closures.
+</Tip>
+
+## Typed messages (`chat.withUIMessage`)
+
+If your chat task is defined with [`chat.withUIMessage<YourUIMessage>()`](/ai-chat/types) (custom `data-*` parts, typed tools, etc.), pass the same message type through `useChat` so `messages` and `message.parts` are narrowed on the client:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, type InferChatUIMessage } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "./myChat";
+
+type Msg = InferChatUIMessage<typeof myChat>;
+
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+});
+const { messages } = useChat<Msg>({ transport });
+```
+
+See the [Types](/ai-chat/types) guide for defining `YourUIMessage`, default stream options, and backend examples.
+
+### Dynamic access tokens
+
+For token refresh, pass a function instead of a string. The transport calls it when it needs a **trigger** token: starting a run from `sendMessages`, or when you call `preload()`. The callback receives `chatId` and `purpose` (`"trigger"` | `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` to type your server action or fetch handler (see [reference](/ai-chat/reference#triggerchattransport-options)).
+
+```ts
+import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: async (input: ResolveChatAccessTokenParams) => {
+    const res = await fetch("/api/chat-token", {
+      method: "POST",
+      body: JSON.stringify(input),
+    });
+    return res.text();
+  },
+});
+```
+
+## Session management
+
+### Session cleanup (frontend)
+
+Since session creation and updates are handled server-side, the frontend only needs to handle session deletion when a run ends:
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+  sessions: loadedSessions, // Restored from DB on page load
+  onSessionChange: (chatId, session) => {
+    if (!session) {
+      deleteSession(chatId); // Server action — run ended
+    }
+  },
+});
+```
+
+### Restoring on page load
+
+On page load, fetch both the messages and the session from your database, then pass them to `useChat` and the transport. Pass `resume: true` to `useChat` when there's an existing conversation — this tells the AI SDK to reconnect to the stream via the transport.
+
+```tsx app/page.tsx
+"use client";
+
+import { useEffect, useState } from "react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+import { getChatToken, getChatMessages, getSession, deleteSession } from "@/app/actions";
+
+export default function ChatPage({ chatId }: { chatId: string }) {
+  const [initialMessages, setInitialMessages] = useState([]);
+  const [initialSession, setInitialSession] = useState(undefined);
+  const [loaded, setLoaded] = useState(false);
+
+  useEffect(() => {
+    async function load() {
+      const [messages, session] = await Promise.all([getChatMessages(chatId), getSession(chatId)]);
+      setInitialMessages(messages);
+      setInitialSession(session ? { [chatId]: session } : undefined);
+      setLoaded(true);
+    }
+    load();
+  }, [chatId]);
+
+  if (!loaded) return null;
+
+  return (
+    <ChatClient
+      chatId={chatId}
+      initialMessages={initialMessages}
+      initialSessions={initialSession}
+    />
+  );
+}
+
+function ChatClient({ chatId, initialMessages, initialSessions }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: getChatToken,
+    sessions: initialSessions,
+    onSessionChange: (id, session) => {
+      if (!session) deleteSession(id);
+    },
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    messages: initialMessages,
+    transport,
+    resume: initialMessages.length > 0, // Resume if there's an existing conversation
+  });
+
+  // ... render UI
+}
+```
+
+<Info>
+  `resume: true` causes `useChat` to call `reconnectToStream` on the transport when the component
+  mounts. The transport uses the session's `lastEventId` to skip past already-seen stream events, so
+  the frontend only receives new data. Only enable `resume` when there are existing messages — for
+  brand new chats, there's nothing to reconnect to.
+</Info>
+
+<Warning>
+  In React strict mode (enabled by default in Next.js dev), you may see a `TypeError: Cannot read
+  properties of undefined (reading 'state')` in the console when using `resume`. This is a [known
+  bug in the AI SDK](https://github.com/vercel/ai/issues/8477) caused by React strict mode
+  double-firing the resume effect. The error is caught internally and **does not affect
+  functionality** — streaming and message display work correctly. It only appears in development and
+  will not occur in production builds.
+</Warning>
+
+## Client data and metadata
+
+### Transport-level client data
+
+Set default client data on the transport that's included in every request. When the task uses `clientDataSchema`, this is type-checked to match:
+
+```ts
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+  clientData: { userId: currentUser.id },
+});
+```
+
+### Per-message metadata
+
+Pass metadata with individual messages via `sendMessage`. Per-message values are merged with transport-level client data (per-message wins on conflicts):
+
+```ts
+sendMessage({ text: "Hello" }, { metadata: { model: "gpt-4o", priority: "high" } });
+```
+
+### Typed client data with clientDataSchema
+
+Instead of manually parsing `clientData` with Zod in every hook, pass a `clientDataSchema` to `chat.task`. The schema validates the data once per turn, and `clientData` is typed in all hooks and `run`:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({
+    model: z.string().optional(),
+    userId: z.string(),
+  }),
+  onChatStart: async ({ chatId, clientData }) => {
+    // clientData is typed as { model?: string; userId: string }
+    await db.chat.create({
+      data: { id: chatId, userId: clientData.userId },
+    });
+  },
+  run: async ({ messages, clientData, signal }) => {
+    // Same typed clientData — no manual parsing needed
+    return streamText({
+      model: openai(clientData?.model ?? "gpt-4o"),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+The schema also types the `clientData` option on the frontend transport:
+
+```ts
+// TypeScript enforces that clientData matches the schema
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+  clientData: { userId: currentUser.id },
+});
+```
+
+Supports Zod, ArkType, Valibot, and other schema libraries supported by the SDK.
+
+## Stop generation
+
+Calling `stop()` from `useChat` sends a stop signal to the running task via input streams. The task aborts the current `streamText` call, but the run stays alive for the next message:
+
+```tsx
+const { messages, sendMessage, stop, status } = useChat({ transport });
+
+{
+  status === "streaming" && (
+    <button type="button" onClick={stop}>
+      Stop
+    </button>
+  );
+}
+```
+
+See [Stop generation](/ai-chat/backend#stop-generation) in the backend docs for how to handle stop signals in your task.
+
+## Self-hosting
+
+If you're self-hosting Trigger.dev, pass the `baseURL` option:
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken,
+  baseURL: "https://your-trigger-instance.com",
+});
+```
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
new file mode 100644
index 00000000000..3fe6d0f3ec2
--- /dev/null
+++ b/docs/ai-chat/overview.mdx
@@ -0,0 +1,162 @@
+---
+title: "AI Chat"
+sidebarTitle: "Overview"
+description: "Run AI SDK chat completions as durable Trigger.dev tasks with built-in realtime streaming, multi-turn conversations, and message persistence."
+---
+
+## Overview
+
+The `@trigger.dev/sdk` provides a custom [ChatTransport](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) for the Vercel AI SDK's `useChat` hook. This lets you run chat completions as **durable Trigger.dev tasks** instead of fragile API routes — with automatic retries, observability, and realtime streaming built in.
+
+**How it works:**
+1. The frontend sends messages via `useChat` through `TriggerChatTransport`
+2. The first message triggers a Trigger.dev task; subsequent messages resume the **same run** via input streams
+3. The task streams `UIMessageChunk` events back via Trigger.dev's realtime streams
+4. The AI SDK's `useChat` processes the stream natively — text, tool calls, reasoning, etc.
+5. Between turns, the run stays idle briefly then suspends (freeing compute) until the next message
+
+No custom API routes needed. Your chat backend is a Trigger.dev task.
+
+<Accordion title="How it works (sequence diagrams)">
+
+### First message flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: No session for chatId → trigger new run
+    useChat->>API: triggerTask(payload, tags: [chat:id])
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session, subscribe to SSE
+
+    API->>Task: Start run with ChatTaskWirePayload
+    Task->>Task: onChatStart({ chatId, messages, clientData })
+    Task->>Task: onTurnStart({ chatId, messages })
+    Task->>LLM: streamText({ model, messages, abortSignal })
+    LLM-->>Task: Stream response chunks
+    Task->>API: streams.pipe("chat", uiStream)
+    API-->>useChat: SSE: UIMessageChunks
+    useChat-->>User: Render streaming text
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: turn complete + refreshed token
+    useChat->>useChat: Close stream, update session
+    Task->>Task: onTurnComplete({ messages, stopped: false })
+    Task->>Task: Wait for next message (idle → suspend)
+```
+
+### Multi-turn flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Suspended, waiting for message
+
+    User->>useChat: sendMessage("Tell me more")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    Note right of useChat: Only sends new message (not full history)
+
+    API->>Task: Deliver to messagesInput
+    Task->>Task: Wake from suspend
+    Task->>Task: Append to accumulated messages
+    Task->>Task: onTurnStart({ turn: 1 })
+    Task->>LLM: streamText({ messages: [all accumulated] })
+    LLM-->>Task: Stream response
+    Task->>API: streams.pipe("chat", uiStream)
+    API-->>useChat: SSE: UIMessageChunks
+    useChat-->>User: Render streaming text
+    Task->>API: Write __trigger_turn_complete
+    Task->>Task: onTurnComplete({ turn: 1 })
+    Task->>Task: Wait for next message (idle → suspend)
+```
+
+### Stop signal flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Streaming response...
+
+    User->>useChat: Click "Stop"
+    useChat->>API: sendInputStream(runId, "chat-stop", { stop: true })
+    API->>Task: Deliver to stopInput
+    Task->>Task: stopController.abort()
+    LLM-->>Task: Stream ends (AbortError)
+    Task->>Task: cleanupAbortedParts(responseMessage)
+    Note right of Task: Remove partial tool calls,<br/>mark streaming parts as done
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: turn complete
+    Task->>Task: onTurnComplete({ stopped: true })
+    Task->>Task: Wait for next message
+```
+
+</Accordion>
+
+<Note>
+  Requires `@trigger.dev/sdk` version **4.4.0 or later** and the `ai` package **v5.0.0 or later**.
+</Note>
+
+## How multi-turn works
+
+### One run, many turns
+
+The entire conversation lives in a **single Trigger.dev run**. After each AI response, the run waits for the next message via input streams. The frontend transport handles this automatically — it triggers a new run for the first message, and sends subsequent messages to the existing run.
+
+This means your conversation has full observability in the Trigger.dev dashboard: every turn is a span inside the same run.
+
+### Warm and suspended states
+
+After each turn, the run goes through two phases of waiting:
+
+1. **Warm phase** (default 30s) — The run stays active and responds instantly to the next message. Uses compute.
+2. **Suspended phase** (default up to 1h) — The run suspends, freeing compute. It wakes when the next message arrives. There's a brief delay as the run resumes.
+
+If no message arrives within the turn timeout, the run ends gracefully. The next message from the frontend will automatically start a fresh run.
+
+<Info>
+  You are not charged for compute during the suspended phase. Only the idle phase uses compute resources.
+</Info>
+
+### What the backend accumulates
+
+The backend automatically accumulates the full conversation history across turns. After the first turn, the frontend transport only sends the new user message — not the entire history. This is handled transparently by the transport and task.
+
+The accumulated messages are available in:
+- `run()` as `messages` (`ModelMessage[]`) — for passing to `streamText`
+- `onTurnStart()` as `uiMessages` (`UIMessage[]`) — for persisting before streaming
+- `onTurnComplete()` as `uiMessages` (`UIMessage[]`) — for persisting after the response
+
+## Three approaches
+
+There are three ways to build the backend, from most opinionated to most flexible:
+
+| Approach | Use when | What you get |
+|----------|----------|--------------|
+| [chat.task()](/ai-chat/backend#chattask) | Most apps | Auto-piping, lifecycle hooks, message accumulation, stop handling |
+| [chat.createSession()](/ai-chat/backend#chatcreatesession) | Need a loop but not hooks | Async iterator with per-turn helpers, message accumulation, stop handling |
+| [Raw task + primitives](/ai-chat/backend#raw-task-with-primitives) | Full control | Manual control of every step — use `chat.messages`, `chat.createStopSignal()`, etc. |
+
+## Related
+
+- [Quick Start](/ai-chat/quick-start) — Get a working chat in 3 steps
+- [Backend](/ai-chat/backend) — Backend approaches in detail
+- [Frontend](/ai-chat/frontend) — Transport setup, sessions, client data
+- [Types](/ai-chat/types) — TypeScript patterns, including custom `UIMessage` with `chat.withUIMessage`
+- [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
+- [API Reference](/ai-chat/reference) — Complete reference tables
diff --git a/docs/ai-chat/pending-messages.mdx b/docs/ai-chat/pending-messages.mdx
new file mode 100644
index 00000000000..3f0e9ecefda
--- /dev/null
+++ b/docs/ai-chat/pending-messages.mdx
@@ -0,0 +1,327 @@
+---
+title: "Pending Messages"
+sidebarTitle: "Pending Messages"
+description: "Inject user messages mid-execution to steer agents between tool-call steps."
+---
+
+## Overview
+
+When an AI agent is executing tool calls, users may want to send a message that **steers the agent mid-execution** — adding context, correcting course, or refining the request without waiting for the response to finish.
+
+The `pendingMessages` option enables this by injecting user messages between tool-call steps via the AI SDK's `prepareStep`. Messages that arrive during streaming are queued and injected at the next step boundary. If there are no more step boundaries (single-step response or final text generation), the message becomes the next turn automatically.
+
+## How it works
+
+1. User sends a message while the agent is streaming
+2. The message is sent to the backend via input stream (`transport.sendPendingMessage`)
+3. The backend queues it in the steering queue
+4. At the next `prepareStep` boundary (between tool-call steps), `shouldInject` is called
+5. If it returns `true`, the message is injected into the LLM's context
+6. A `data-pending-message-injected` stream chunk confirms injection to the frontend
+7. If `prepareStep` never fires (no tool calls), the message becomes the next turn
+
+## Backend: chat.task
+
+Add `pendingMessages` to your `chat.task` configuration:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  pendingMessages: {
+    // Only inject when there are completed steps (tool calls happened)
+    shouldInject: ({ steps }) => steps.length > 0,
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      tools: { /* ... */ },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+The `prepareStep` for injection is automatically included when you spread `chat.toStreamTextOptions()`. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
+
+### Options
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean` | Decide whether to inject the batch. Called once per step boundary. If absent, no injection happens. |
+| `prepare` | `(event: PendingMessagesBatchEvent) => ModelMessage[]` | Transform the batch before injection. Default: convert each message via `convertToModelMessages`. |
+| `onReceived` | `(event) => void` | Called when a message arrives during streaming (per-message). |
+| `onInjected` | `(event) => void` | Called after a batch is injected. |
+
+### shouldInject
+
+Called once per step boundary with the full batch of pending messages. Return `true` to inject all of them, `false` to skip (they'll be available at the next boundary or become the next turn).
+
+```ts
+pendingMessages: {
+  // Always inject
+  shouldInject: () => true,
+
+  // Only inject after tool calls
+  shouldInject: ({ steps }) => steps.length > 0,
+
+  // Only inject if there's one message
+  shouldInject: ({ messages }) => messages.length === 1,
+},
+```
+
+The event includes:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `UIMessage[]` | All pending messages (batch) |
+| `modelMessages` | `ModelMessage[]` | Current conversation |
+| `steps` | `CompactionStep[]` | Completed steps |
+| `stepNumber` | `number` | Current step (0-indexed) |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `clientData` | `unknown` | Frontend metadata |
+
+### prepare
+
+Transform the batch of pending messages before they're injected into the LLM's context. By default, each UIMessage is converted to ModelMessages individually. Use `prepare` to combine multiple messages or add context:
+
+```ts
+pendingMessages: {
+  shouldInject: ({ steps }) => steps.length > 0,
+  prepare: ({ messages }) => [{
+    role: "user",
+    content: messages.length === 1
+      ? messages[0].parts[0]?.text ?? ""
+      : `The user sent ${messages.length} messages:\n${
+          messages.map((m, i) => `${i + 1}. ${m.parts[0]?.text}`).join("\n")
+        }`,
+  }],
+},
+```
+
+### Stream chunk
+
+When messages are injected, the SDK automatically writes a `data-pending-message-injected` stream chunk containing the message IDs and text. The frontend uses this to:
+- Confirm which messages were injected
+- Remove them from the pending overlay
+- Render them inline at the injection point in the assistant response
+
+A "pending message injected" span also appears in the run trace.
+
+## Backend: chat.createSession
+
+Pass `pendingMessages` to the session options:
+
+```ts
+const session = chat.createSession(payload, {
+  signal,
+  idleTimeoutInSeconds: 60,
+  pendingMessages: {
+    shouldInject: () => true,
+  },
+});
+
+for await (const turn of session) {
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+    prepareStep: turn.prepareStep(), // Handles injection + compaction
+  });
+
+  await turn.complete(result);
+}
+```
+
+Use `turn.prepareStep()` to get a prepareStep function that handles both injection and compaction. Users who spread `chat.toStreamTextOptions()` get it automatically.
+
+## Backend: MessageAccumulator (raw task)
+
+Pass `pendingMessages` to the constructor and wire up the message listener manually:
+
+```ts
+const conversation = new chat.MessageAccumulator({
+  pendingMessages: {
+    shouldInject: () => true,
+    prepare: ({ messages }) => [{
+      role: "user",
+      content: `[Steering]: ${messages.map(m => m.parts[0]?.text).join(", ")}`,
+    }],
+  },
+});
+
+for (let turn = 0; turn < 100; turn++) {
+  const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+  // Listen for steering messages during streaming
+  const sub = chat.messages.on(async (msg) => {
+    const lastMsg = msg.messages?.[msg.messages.length - 1];
+    if (lastMsg) await conversation.steerAsync(lastMsg);
+  });
+
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+    prepareStep: conversation.prepareStep(), // Handles injection + compaction
+  });
+
+  const response = await chat.pipeAndCapture(result);
+  sub.off();
+
+  if (response) await conversation.addResponse(response);
+  await chat.writeTurnComplete();
+}
+```
+
+### MessageAccumulator methods
+
+| Method | Description |
+|--------|-------------|
+| `steer(message, modelMessages?)` | Queue a UIMessage for injection (sync) |
+| `steerAsync(message)` | Queue a UIMessage, converting to model messages automatically |
+| `drainSteering()` | Get and clear unconsumed steering messages |
+| `prepareStep()` | Returns a prepareStep function handling injection + compaction |
+
+## Frontend: usePendingMessages hook
+
+The `usePendingMessages` hook manages all the frontend complexity — tracking pending messages, detecting injections, and handling the turn lifecycle.
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, usePendingMessages } from "@trigger.dev/sdk/chat/react";
+
+function Chat({ chatId }) {
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+
+  const { messages, setMessages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    transport,
+  });
+
+  const pending = usePendingMessages({
+    transport,
+    chatId,
+    status,
+    messages,
+    setMessages,
+    sendMessage,
+    metadata: { model: "gpt-4o" },
+  });
+
+  return (
+    <div>
+      {/* Render messages */}
+      {messages.map((msg) => (
+        <div key={msg.id}>
+          {msg.role === "assistant" ? (
+            msg.parts.map((part, i) =>
+              pending.isInjectionPoint(part) ? (
+                // Render injected messages inline at the injection point
+                <div key={i}>
+                  {pending.getInjectedMessages(part).map((m) => (
+                    <div key={m.id} className="injected-message">{m.text}</div>
+                  ))}
+                </div>
+              ) : (
+                <Part key={i} part={part} />
+              )
+            )
+          ) : (
+            <UserMessage msg={msg} />
+          )}
+        </div>
+      ))}
+
+      {/* Render pending messages */}
+      {pending.pending.map((msg) => (
+        <div key={msg.id}>
+          <span>{msg.text}</span>
+          <span>{msg.mode === "steering" ? "Steering" : "Queued"}</span>
+          {msg.mode === "queued" && status === "streaming" && (
+            <button onClick={() => pending.promoteToSteering(msg.id)}>
+              Steer instead
+            </button>
+          )}
+        </div>
+      ))}
+
+      {/* Send form */}
+      <form onSubmit={(e) => {
+        e.preventDefault();
+        pending.steer(input); // Steers during streaming, sends normally when ready
+        setInput("");
+      }}>
+        <input value={input} onChange={(e) => setInput(e.target.value)} />
+        <button type="submit">Send</button>
+        {status === "streaming" && (
+          <button type="button" onClick={() => { pending.queue(input); setInput(""); }}>
+            Queue
+          </button>
+        )}
+      </form>
+    </div>
+  );
+}
+```
+
+### Hook API
+
+| Property/Method | Type | Description |
+|----------------|------|-------------|
+| `pending` | `PendingMessage[]` | Current pending messages with `id`, `text`, `mode`, and `injected` status |
+| `steer(text)` | `(text: string) => void` | Send a steering message during streaming, or normal message when ready |
+| `queue(text)` | `(text: string) => void` | Queue for next turn during streaming, or send normally when ready |
+| `promoteToSteering(id)` | `(id: string) => void` | Convert a queued message to steering (sends via input stream immediately) |
+| `isInjectionPoint(part)` | `(part: unknown) => boolean` | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds(part)` | `(part: unknown) => string[]` | Get message IDs from an injection point |
+| `getInjectedMessages(part)` | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point |
+
+### PendingMessage
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `string` | Unique message ID |
+| `text` | `string` | Message text |
+| `mode` | `"steering" \| "queued"` | How the message is being handled |
+| `injected` | `boolean` | Whether the backend confirmed injection |
+
+### Message lifecycle
+
+- **Steering messages** are sent via `transport.sendPendingMessage()` immediately. They appear as purple pending bubbles. If injected, they disappear from the overlay and render inline at the injection point. If not injected (no more step boundaries), they auto-send as the next turn when the response finishes.
+
+- **Queued messages** stay client-side until the turn completes, then auto-send as the next turn via `sendMessage()`. They can be promoted to steering mid-stream by clicking "Steer instead".
+
+- **Promoted messages** are queued messages that were converted to steering. They get sent via input stream immediately and follow the steering lifecycle from that point.
+
+## Transport: sendPendingMessage
+
+The `TriggerChatTransport` exposes a `sendPendingMessage` method for sending messages via input stream without disrupting the active stream subscription:
+
+```ts
+const sent = await transport.sendPendingMessage(chatId, {
+  id: crypto.randomUUID(),
+  role: "user",
+  parts: [{ type: "text", text: "and compare to vercel" }],
+}, { model: "gpt-4o" });
+```
+
+Unlike `sendMessage()` from useChat, this does NOT:
+- Add the message to useChat's local state
+- Cancel the active stream subscription
+- Start a new response stream
+
+The `usePendingMessages` hook calls this internally — you typically don't need to use it directly.
+
+## Coexistence with compaction
+
+Pending message injection and compaction both use `prepareStep`. When both are configured, the auto-injected `prepareStep` handles them in order:
+
+1. **Compaction** runs first — checks threshold, generates summary if needed
+2. **Injection** runs second — pending messages are appended to either the compacted or original messages
+
+This means injected messages are always included after compaction, ensuring the LLM sees both the compressed history and the new steering input.
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
new file mode 100644
index 00000000000..881cc381548
--- /dev/null
+++ b/docs/ai-chat/quick-start.mdx
@@ -0,0 +1,118 @@
+---
+title: "Quick Start"
+sidebarTitle: "Quick Start"
+description: "Get a working AI chat in 3 steps — define a task, generate a token, and wire up the frontend."
+---
+
+<Steps>
+  <Step title="Define a chat task">
+    Use `chat.task` from `@trigger.dev/sdk/ai` to define a task that handles chat messages. The `run` function receives `ModelMessage[]` (already converted from the frontend's `UIMessage[]`) — pass them directly to `streamText`.
+
+    If you return a `StreamTextResult`, it's **automatically piped** to the frontend.
+
+    ```ts trigger/chat.ts
+    import { chat } from "@trigger.dev/sdk/ai";
+    import { streamText } from "ai";
+    import { openai } from "@ai-sdk/openai";
+
+    export const myChat = chat.task({
+      id: "my-chat",
+      run: async ({ messages, signal }) => {
+        // messages is ModelMessage[] — pass directly to streamText
+        // signal fires on stop or run cancel
+        return streamText({
+          model: openai("gpt-4o"),
+          messages,
+          abortSignal: signal,
+        });
+      },
+    });
+    ```
+
+    <Tip>
+      For a **custom** [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype (typed `data-*` parts, tool map, etc.), define the task with [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task`.
+    </Tip>
+
+  </Step>
+
+  <Step title="Generate an access token">
+    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat task. The transport calls your function with `chatId` and `purpose` (`"trigger"` or `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` so the signature matches — see [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
+
+    ```ts app/actions.ts
+    "use server";
+
+    import { chat } from "@trigger.dev/sdk/ai";
+    import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+    import type { myChat } from "@/trigger/chat";
+
+    export async function getChatToken(_input: ResolveChatAccessTokenParams) {
+      return chat.createAccessToken<typeof myChat>("my-chat");
+    }
+    ```
+
+  </Step>
+
+  <Step title="Use in the frontend">
+    Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`:
+
+    ```tsx app/components/chat.tsx
+    "use client";
+
+    import { useChat } from "@ai-sdk/react";
+    import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+    import type { myChat } from "@/trigger/chat";
+    import { getChatToken } from "@/app/actions";
+
+    export function Chat() {
+      const transport = useTriggerChatTransport<typeof myChat>({
+        task: "my-chat",
+        accessToken: getChatToken,
+      });
+
+      const { messages, sendMessage, stop, status } = useChat({ transport });
+
+      return (
+        <div>
+          {messages.map((m) => (
+            <div key={m.id}>
+              <strong>{m.role}:</strong>
+              {m.parts.map((part, i) =>
+                part.type === "text" ? <span key={i}>{part.text}</span> : null
+              )}
+            </div>
+          ))}
+
+          <form
+            onSubmit={(e) => {
+              e.preventDefault();
+              const input = e.currentTarget.querySelector("input");
+              if (input?.value) {
+                sendMessage({ text: input.value });
+                input.value = "";
+              }
+            }}
+          >
+            <input placeholder="Type a message..." />
+            <button type="submit" disabled={status === "streaming"}>
+              Send
+            </button>
+            {status === "streaming" && (
+              <button type="button" onClick={stop}>
+                Stop
+              </button>
+            )}
+          </form>
+        </div>
+      );
+    }
+    ```
+
+  </Step>
+</Steps>
+
+## Next steps
+
+- [Backend](/ai-chat/backend) — Lifecycle hooks, persistence, session iterator, raw task primitives
+- [Frontend](/ai-chat/frontend) — Session management, client data, reconnection
+- [Types](/ai-chat/types) — `chat.withUIMessage`, `InferChatUIMessage`, and related typing
+- [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
new file mode 100644
index 00000000000..40f6f04daf8
--- /dev/null
+++ b/docs/ai-chat/reference.mdx
@@ -0,0 +1,470 @@
+---
+title: "API Reference"
+sidebarTitle: "API Reference"
+description: "Complete API reference for the AI Chat SDK — backend options, events, frontend transport, and hooks."
+---
+
+## ChatTaskOptions
+
+Options for `chat.task()`.
+
+| Option                        | Type                                                        | Default                        | Description                                                                                         |
+| ----------------------------- | ----------------------------------------------------------- | ------------------------------ | --------------------------------------------------------------------------------------------------- |
+| `id`                          | `string`                                                    | required                       | Task identifier                                                                                     |
+| `run`                         | `(payload: ChatTaskRunPayload) => Promise<unknown>`         | required                       | Handler for each turn                                                                               |
+| `clientDataSchema`            | `TaskSchema`                                                | —                              | Schema for validating and typing `clientData`                                                       |
+| `onPreload`                   | `(event: PreloadEvent) => Promise<void> \| void`            | —                              | Fires on preloaded runs before the first message                                                    |
+| `onChatStart`                 | `(event: ChatStartEvent) => Promise<void> \| void`          | —                              | Fires on turn 0 before `run()`                                                                      |
+| `onTurnStart`                 | `(event: TurnStartEvent) => Promise<void> \| void`          | —                              | Fires every turn before `run()`                                                                     |
+| `onBeforeTurnComplete`        | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | —                              | Fires after response but before stream closes. Includes `writer`.                                   |
+| `onTurnComplete`              | `(event: TurnCompleteEvent) => Promise<void> \| void`       | —                              | Fires after each turn completes (stream closed)                                                     |
+| `onCompacted`                 | `(event: CompactedEvent) => Promise<void> \| void`          | —                              | Fires when compaction occurs. Includes `writer`. See [Compaction](/ai-chat/compaction)              |
+| `compaction`                  | `ChatTaskCompactionOptions`                                 | —                              | Automatic context compaction. See [Compaction](/ai-chat/compaction)                                 |
+| `pendingMessages`             | `PendingMessagesOptions`                                    | —                              | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages)                  |
+| `prepareMessages`             | `(event: PrepareMessagesEvent) => ModelMessage[]`           | —                              | Transform model messages before use (cache breaks, context injection, etc.)                         |
+| `maxTurns`                    | `number`                                                    | `100`                          | Max conversational turns per run                                                                    |
+| `turnTimeout`                 | `string`                                                    | `"1h"`                         | How long to wait for next message                                                                   |
+| `idleTimeoutInSeconds`        | `number`                                                    | `30`                           | Seconds to stay idle before suspending                                                              |
+| `chatAccessTokenTTL`          | `string`                                                    | `"1h"`                         | How long the scoped access token remains valid                                                      |
+| `preloadIdleTimeoutInSeconds` | `number`                                                    | Same as `idleTimeoutInSeconds` | Idle timeout after `onPreload` fires                                                                |
+| `preloadTimeout`              | `string`                                                    | Same as `turnTimeout`          | Suspend timeout for preloaded runs                                                                  |
+| `uiMessageStreamOptions`      | `ChatUIMessageStreamOptions`                                | —                              | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
+
+Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`, `maxDuration`, etc.
+
+## ChatTaskRunPayload
+
+The payload passed to the `run` function.
+
+| Field          | Type                                       | Description                                                          |
+| -------------- | ------------------------------------------ | -------------------------------------------------------------------- |
+| `messages`     | `ModelMessage[]`                           | Model-ready messages — pass directly to `streamText`                 |
+| `chatId`       | `string`                                   | Unique chat session ID                                               |
+| `trigger`      | `"submit-message" \| "regenerate-message"` | What triggered the request                                           |
+| `messageId`    | `string \| undefined`                      | Message ID (for regenerate)                                          |
+| `clientData`   | Typed by `clientDataSchema`                | Custom data from the frontend (typed when schema is provided)        |
+| `continuation` | `boolean`                                  | Whether this run is continuing an existing chat (previous run ended) |
+| `signal`       | `AbortSignal`                              | Combined stop + cancel signal                                        |
+| `cancelSignal` | `AbortSignal`                              | Cancel-only signal                                                   |
+| `stopSignal`   | `AbortSignal`                              | Stop-only signal (per-turn)                                          |
+
+## PreloadEvent
+
+Passed to the `onPreload` callback.
+
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+
+## ChatStartEvent
+
+Passed to the `onChatStart` callback.
+
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `messages`        | `ModelMessage[]`            | Initial model-ready messages                                   |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `continuation`    | `boolean`                   | Whether this run is continuing an existing chat                |
+| `previousRunId`   | `string \| undefined`       | Previous run ID (only when `continuation` is true)             |
+| `preloaded`       | `boolean`                   | Whether this run was preloaded before the first message        |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+
+## TurnStartEvent
+
+Passed to the `onTurnStart` callback.
+
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `messages`        | `ModelMessage[]`            | Full accumulated conversation (model format)                   |
+| `uiMessages`      | `UIMessage[]`               | Full accumulated conversation (UI format)                      |
+| `turn`            | `number`                    | Turn number (0-indexed)                                        |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `continuation`    | `boolean`                   | Whether this run is continuing an existing chat                |
+| `previousRunId`   | `string \| undefined`       | Previous run ID (only when `continuation` is true)             |
+| `preloaded`       | `boolean`                   | Whether this run was preloaded                                 |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+
+## TurnCompleteEvent
+
+Passed to the `onTurnComplete` callback.
+
+| Field                | Type                              | Description                                          |
+| -------------------- | --------------------------------- | ---------------------------------------------------- |
+| `chatId`             | `string`                          | Chat session ID                                      |
+| `messages`           | `ModelMessage[]`                  | Full accumulated conversation (model format)         |
+| `uiMessages`         | `UIMessage[]`                     | Full accumulated conversation (UI format)            |
+| `newMessages`        | `ModelMessage[]`                  | Only this turn's messages (model format)             |
+| `newUIMessages`      | `UIMessage[]`                     | Only this turn's messages (UI format)                |
+| `responseMessage`    | `UIMessage \| undefined`          | The assistant's response for this turn               |
+| `rawResponseMessage` | `UIMessage \| undefined`          | Raw response before abort cleanup                    |
+| `turn`               | `number`                          | Turn number (0-indexed)                              |
+| `runId`              | `string`                          | The Trigger.dev run ID                               |
+| `chatAccessToken`    | `string`                          | Scoped access token for this run                     |
+| `lastEventId`        | `string \| undefined`             | Stream position for resumption                       |
+| `stopped`            | `boolean`                         | Whether the user stopped generation during this turn |
+| `continuation`       | `boolean`                         | Whether this run is continuing an existing chat      |
+| `usage`              | `LanguageModelUsage \| undefined` | Token usage for this turn                            |
+| `totalUsage`         | `LanguageModelUsage`              | Cumulative token usage across all turns              |
+
+## BeforeTurnCompleteEvent
+
+Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent` plus a `writer`.
+
+| Field                            | Type                        | Description                                                                   |
+| -------------------------------- | --------------------------- | ----------------------------------------------------------------------------- |
+| _(all TurnCompleteEvent fields)_ |                             | See [TurnCompleteEvent](#turncompleteevent)                                   |
+| `writer`                         | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
+
+## ChatWriter
+
+A stream writer passed to lifecycle callbacks. Write custom `UIMessageChunk` parts (e.g. `data-*` parts) to the chat stream.
+
+The writer is lazy — no stream is opened unless you call `write()` or `merge()`, so there's zero overhead for callbacks that don't use it.
+
+| Method          | Type                                               | Description                                        |
+| --------------- | -------------------------------------------------- | -------------------------------------------------- |
+| `write(part)`   | `(part: UIMessageChunk) => void`                   | Write a single chunk to the chat stream            |
+| `merge(stream)` | `(stream: ReadableStream<UIMessageChunk>) => void` | Merge another stream's chunks into the chat stream |
+
+```ts
+onTurnStart: async ({ writer }) => {
+  // Write a custom data part — render it on the frontend
+  writer.write({ type: "data-status", data: { loading: true } });
+},
+onBeforeTurnComplete: async ({ writer, usage }) => {
+  // Stream is still open — these chunks arrive before the turn ends
+  writer.write({ type: "data-usage", data: { tokens: usage?.totalTokens } });
+},
+```
+
+## ChatTaskCompactionOptions
+
+Options for the `compaction` field on `chat.task()`. See [Compaction](/ai-chat/compaction) for usage guide.
+
+| Option                 | Type                                                                         | Required | Description                                                                  |
+| ---------------------- | ---------------------------------------------------------------------------- | -------- | ---------------------------------------------------------------------------- |
+| `shouldCompact`        | `(event: ShouldCompactEvent) => boolean \| Promise<boolean>`                 | Yes      | Decide whether to compact. Return `true` to trigger                          |
+| `summarize`            | `(event: SummarizeEvent) => Promise<string>`                                 | Yes      | Generate a summary from the current messages                                 |
+| `compactUIMessages`    | `(event: CompactMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>`       | No       | Transform UI messages after compaction. Default: preserve all                |
+| `compactModelMessages` | `(event: CompactMessagesEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No       | Transform model messages after compaction. Default: replace all with summary |
+
+## CompactMessagesEvent
+
+Passed to `compactUIMessages` and `compactModelMessages` callbacks.
+
+| Field           | Type                 | Description                                          |
+| --------------- | -------------------- | ---------------------------------------------------- |
+| `summary`       | `string`             | The generated summary text                           |
+| `uiMessages`    | `UIMessage[]`        | Current UI messages (full conversation)              |
+| `modelMessages` | `ModelMessage[]`     | Current model messages (full conversation)           |
+| `chatId`        | `string`             | Chat session ID                                      |
+| `turn`          | `number`             | Current turn (0-indexed)                             |
+| `clientData`    | `unknown`            | Custom data from the frontend                        |
+| `source`        | `"inner" \| "outer"` | Whether compaction is between steps or between turns |
+
+## CompactedEvent
+
+Passed to the `onCompacted` callback.
+
+| Field          | Type                        | Description                                       |
+| -------------- | --------------------------- | ------------------------------------------------- |
+| `summary`      | `string`                    | The generated summary text                        |
+| `messages`     | `ModelMessage[]`            | Messages that were compacted (pre-compaction)     |
+| `messageCount` | `number`                    | Number of messages before compaction              |
+| `usage`        | `LanguageModelUsage`        | Token usage from the triggering step/turn         |
+| `totalTokens`  | `number \| undefined`       | Total token count that triggered compaction       |
+| `inputTokens`  | `number \| undefined`       | Input token count                                 |
+| `outputTokens` | `number \| undefined`       | Output token count                                |
+| `stepNumber`   | `number`                    | Step number (-1 for outer loop)                   |
+| `chatId`       | `string \| undefined`       | Chat session ID                                   |
+| `turn`         | `number \| undefined`       | Current turn                                      |
+| `writer`       | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks during compaction |
+
+## PendingMessagesOptions
+
+Options for the `pendingMessages` field. See [Pending Messages](/ai-chat/pending-messages) for usage guide.
+
+| Option         | Type                                                                              | Required | Description                                                                               |
+| -------------- | --------------------------------------------------------------------------------- | -------- | ----------------------------------------------------------------------------------------- |
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean \| Promise<boolean>`               | No       | Decide whether to inject the batch between tool-call steps. If absent, no injection.      |
+| `prepare`      | `(event: PendingMessagesBatchEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No       | Transform the batch before injection. Default: convert each via `convertToModelMessages`. |
+| `onReceived`   | `(event: PendingMessageReceivedEvent) => void \| Promise<void>`                   | No       | Called when a message arrives during streaming (per-message).                             |
+| `onInjected`   | `(event: PendingMessagesInjectedEvent) => void \| Promise<void>`                  | No       | Called after a batch is injected via prepareStep.                                         |
+
+## PendingMessagesBatchEvent
+
+Passed to `shouldInject` and `prepare` callbacks.
+
+| Field           | Type               | Description                   |
+| --------------- | ------------------ | ----------------------------- |
+| `messages`      | `UIMessage[]`      | All pending messages (batch)  |
+| `modelMessages` | `ModelMessage[]`   | Current conversation          |
+| `steps`         | `CompactionStep[]` | Completed steps so far        |
+| `stepNumber`    | `number`           | Current step (0-indexed)      |
+| `chatId`        | `string`           | Chat session ID               |
+| `turn`          | `number`           | Current turn (0-indexed)      |
+| `clientData`    | `unknown`          | Custom data from the frontend |
+
+## PendingMessagesInjectedEvent
+
+Passed to `onInjected` callback.
+
+| Field                   | Type             | Description                           |
+| ----------------------- | ---------------- | ------------------------------------- |
+| `messages`              | `UIMessage[]`    | All injected UI messages              |
+| `injectedModelMessages` | `ModelMessage[]` | The model messages that were injected |
+| `chatId`                | `string`         | Chat session ID                       |
+| `turn`                  | `number`         | Current turn                          |
+| `stepNumber`            | `number`         | Step where injection occurred         |
+
+## UsePendingMessagesReturn
+
+Return value of `usePendingMessages` hook. See [Pending Messages — Frontend](/ai-chat/pending-messages#frontend-usependingmessages-hook).
+
+| Property/Method         | Type                                   | Description                                                     |
+| ----------------------- | -------------------------------------- | --------------------------------------------------------------- |
+| `pending`               | `PendingMessage[]`                     | Current pending messages with mode and injection status         |
+| `steer`                 | `(text: string) => void`               | Send a steering message (or normal message when not streaming)  |
+| `queue`                 | `(text: string) => void`               | Queue for next turn (or send normally when not streaming)       |
+| `promoteToSteering`     | `(id: string) => void`                 | Convert a queued message to steering                            |
+| `isInjectionPoint`      | `(part: unknown) => boolean`           | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds` | `(part: unknown) => string[]`          | Get message IDs from an injection point                         |
+| `getInjectedMessages`   | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point                |
+
+## ChatSessionOptions
+
+Options for `chat.createSession()`.
+
+| Option                 | Type          | Default  | Description                         |
+| ---------------------- | ------------- | -------- | ----------------------------------- |
+| `signal`               | `AbortSignal` | required | Run-level cancel signal             |
+| `idleTimeoutInSeconds` | `number`      | `30`     | Seconds to stay idle between turns  |
+| `timeout`              | `string`      | `"1h"`   | Duration string for suspend timeout |
+| `maxTurns`             | `number`      | `100`    | Max turns before ending             |
+
+## ChatTurn
+
+Each turn yielded by `chat.createSession()`.
+
+| Field          | Type             | Description                                   |
+| -------------- | ---------------- | --------------------------------------------- |
+| `number`       | `number`         | Turn number (0-indexed)                       |
+| `chatId`       | `string`         | Chat session ID                               |
+| `trigger`      | `string`         | What triggered this turn                      |
+| `clientData`   | `unknown`        | Client data from the transport                |
+| `messages`     | `ModelMessage[]` | Full accumulated model messages               |
+| `uiMessages`   | `UIMessage[]`    | Full accumulated UI messages                  |
+| `signal`       | `AbortSignal`    | Combined stop+cancel signal (fresh each turn) |
+| `stopped`      | `boolean`        | Whether the user stopped generation this turn |
+| `continuation` | `boolean`        | Whether this is a continuation run            |
+
+| Method                  | Returns                           | Description                                                  |
+| ----------------------- | --------------------------------- | ------------------------------------------------------------ |
+| `complete(source)`      | `Promise<UIMessage \| undefined>` | Pipe, capture, accumulate, cleanup, and signal turn-complete |
+| `done()`                | `Promise<void>`                   | Signal turn-complete (when you've piped manually)            |
+| `addResponse(response)` | `Promise<void>`                   | Add response to accumulator manually                         |
+
+## chat namespace
+
+All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
+
+| Method                                      | Description                                                                                                                  |
+| ------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
+| `chat.task(options)`                        | Create a chat task                                                                                                           |
+| `chat.createSession(payload, options)`      | Create an async iterator for chat turns                                                                                      |
+| `chat.pipe(source, options?)`               | Pipe a stream to the frontend (from anywhere inside a task)                                                                  |
+| `chat.pipeAndCapture(source, options?)`     | Pipe and capture the response `UIMessage`                                                                                    |
+| `chat.writeTurnComplete(options?)`          | Signal the frontend that the current turn is complete                                                                        |
+| `chat.createStopSignal()`                   | Create a managed stop signal wired to the stop input stream                                                                  |
+| `chat.messages`                             | Input stream for incoming messages — use `.waitWithIdleTimeout()`                                                            |
+| `chat.local<T>({ id })`                     | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal))                             |
+| `chat.createAccessToken(taskId)`            | Create a public access token for a chat task                                                                                 |
+| `chat.setTurnTimeout(duration)`             | Override turn timeout at runtime (e.g. `"2h"`)                                                                               |
+| `chat.setTurnTimeoutInSeconds(seconds)`     | Override turn timeout at runtime (in seconds)                                                                                |
+| `chat.setIdleTimeoutInSeconds(seconds)`     | Override idle timeout at runtime                                                                                             |
+| `chat.setUIMessageStreamOptions(options)`   | Override `toUIMessageStream()` options for the current turn                                                                  |
+| `chat.defer(promise)`                       | Run background work in parallel with streaming, awaited before `onTurnComplete`                                              |
+| `chat.isStopped()`                          | Check if the current turn was stopped by the user                                                                            |
+| `chat.cleanupAbortedParts(message)`         | Remove incomplete parts from a stopped response message                                                                      |
+| `chat.stream`                               | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`                                                |
+| `chat.MessageAccumulator`                   | Class that accumulates conversation messages across turns                                                                    |
+| `chat.withUIMessage(config?).task(options)` | Same as `chat.task`, but fixes a custom `UIMessage` subtype and optional default stream options. See [Types](/ai-chat/types) |
+
+## `chat.withUIMessage`
+
+Returns `{ task }`, where `task` is like [`chat.task`](#chat-namespace) but parameterized on a UI message type `TUIM`.
+
+```ts
+chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): {
+  task: (options: ChatTaskOptions<..., ..., TUIM>) => Task<...>;
+};
+```
+
+| Parameter              | Type                               | Description                                                                                                                                           |
+| ---------------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `config.streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Optional defaults for `toUIMessageStream()`. Shallow-merged with `uiMessageStreamOptions` on the inner `.task({ ... })` (task wins on key conflicts). |
+
+Use this when you need [`InferChatUIMessage`](#inferchatuimessage) / typed `data-*` parts / `InferUITools` to line up across backend hooks and `useChat`. Full guide: [Types](/ai-chat/types).
+
+## `ChatWithUIMessageConfig`
+
+| Field           | Type                               | Description                                                           |
+| --------------- | ---------------------------------- | --------------------------------------------------------------------- |
+| `streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Default `toUIMessageStream()` options for tasks created via `.task()` |
+
+## `InferChatUIMessage`
+
+Type helper: extracts the `UIMessage` subtype from a chat task’s wire payload.
+
+```ts
+import type { InferChatUIMessage } from "@trigger.dev/sdk/ai";
+// or from "@trigger.dev/sdk/chat/react"
+
+type Msg = InferChatUIMessage<typeof myChat>;
+```
+
+Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-chat/types). For tasks defined with plain `chat.task()` (no custom generic), this resolves to the base `UIMessage`.
+
+## AI helpers (`ai` from `@trigger.dev/sdk/ai`)
+
+| Export                                                                              | Status         | Description                                                                                                                                                                                |
+| ----------------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `ai.toolExecute(task)`                                                              | **Preferred**  | Returns the `execute` function for AI SDK `tool()`. Runs the task via `triggerAndSubscribe` and attaches tool/chat metadata (same behavior the deprecated wrapper used internally).        |
+| `ai.tool(task, options?)`                                                           | **Deprecated** | Wraps `tool()` / `dynamicTool()` and the same execute path. Migrate to `tool({ ..., execute: ai.toolExecute(task) })`. See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools). |
+| `ai.toolCallId`, `ai.chatContext`, `ai.chatContextOrThrow`, `ai.currentToolOptions` | Supported      | Work for any task-backed tool execute path, including `ai.toolExecute`.                                                                                                                    |
+
+## ChatUIMessageStreamOptions
+
+Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMessageStreamOptions` on `chat.task()`, or override per-turn via `chat.setUIMessageStreamOptions()`. See [Stream options](/ai-chat/backend#stream-options) for usage examples.
+
+Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish`, `originalMessages`, and `generateMessageId` omitted (managed internally).
+
+| Option            | Type                              | Default           | Description                                                                                                                         |
+| ----------------- | --------------------------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `onError`         | `(error: unknown) => string`      | Raw error message | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
+| `sendReasoning`   | `boolean`                         | `true`            | Send reasoning parts to the client                                                                                                  |
+| `sendSources`     | `boolean`                         | `false`           | Send source parts to the client                                                                                                     |
+| `sendFinish`      | `boolean`                         | `true`            | Send the finish event. Set to `false` when chaining multiple `streamText` calls.                                                    |
+| `sendStart`       | `boolean`                         | `true`            | Send the message start event. Set to `false` when chaining.                                                                         |
+| `messageMetadata` | `(options: { part }) => metadata` | —                 | Extract message metadata to send to the client. Called on `start` and `finish` events.                                              |
+
+## TriggerChatTransport options
+
+Options for the frontend transport constructor and `useTriggerChatTransport` hook.
+
+| Option                 | Type                                                                 | Default                     | Description                                                                 |
+| ---------------------- | -------------------------------------------------------------------- | --------------------------- | --------------------------------------------------------------------------- |
+| `task`                 | `string`                                                             | required                    | Task ID to trigger                                                          |
+| `accessToken`          | `string \| (params: ResolveChatAccessTokenParams) => string \| Promise<string>` | required                    | Trigger / API auth token, or a function that returns one (see below)        |
+| `baseURL`              | `string`                                                             | `"https://api.trigger.dev"` | API base URL (for self-hosted)                                              |
+| `streamKey`            | `string`                                                             | `"chat"`                    | Stream key (only change if using custom key)                                |
+| `headers`              | `Record<string, string>`                                             | —                           | Extra headers for API requests                                              |
+| `streamTimeoutSeconds` | `number`                                                             | `120`                       | How long to wait for stream data                                            |
+| `clientData`           | Typed by `clientDataSchema`                                          | —                           | Default client data for every request                                       |
+| `sessions`             | `Record<string, {...}>`                                              | —                           | Restore sessions from storage                                               |
+| `onSessionChange`      | `(chatId, session \| null) => void`                                  | —                           | Fires when session state changes                                            |
+| `renewRunAccessToken`  | `(params: RenewRunAccessTokenParams) => string \| ... \| Promise<...>` | —                           | Mint a new run-scoped PAT when the run PAT returns 401 (realtime / input stream). Retries once. |
+| `triggerOptions`       | `{...}`                                                              | —                           | Options for the initial task trigger (see below)                            |
+
+### `accessToken` callback
+
+When `accessToken` is a function, the transport calls it with **`ResolveChatAccessTokenParams`** (exported from `@trigger.dev/sdk/chat`):
+
+- `chatId` — the conversation id (`useChat` id / `sendMessages` chat id).
+- `purpose` — `"trigger"` when calling `triggerTask` from `sendMessages` (new run or after the session ended), or `"preload"` when calling `preload()`.
+
+Use this to mint or log per-chat trigger tokens. A plain **`string`** is still supported and skips the callback.
+
+### `renewRunAccessToken` callback
+
+Optional. When the **run** public access token used for realtime SSE or input streams expires, the transport calls this once with **`RenewRunAccessTokenParams`** (`chatId`, `runId`), then retries the failing request. Implement it with your server `auth.createPublicToken` (scopes `read:runs:<runId>` and `write:inputStreams:<runId>`). See [Authentication](/realtime/auth).
+
+```ts
+import { auth } from "@trigger.dev/sdk";
+import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+
+async function getChatToken(input: ResolveChatAccessTokenParams) {
+  return auth.createTriggerPublicToken("my-chat", { expirationTime: "1h" });
+}
+
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: getChatToken,
+  renewRunAccessToken: async ({ chatId, runId }) => {
+    return auth.createPublicToken({
+      scopes: { read: { runs: runId }, write: { inputStreams: runId } },
+      expirationTime: "1h",
+    });
+  },
+});
+```
+
+### triggerOptions
+
+Options forwarded to the Trigger.dev API when starting a new run. Only applies to the first message — subsequent messages reuse the same run.
+
+A `chat:{chatId}` tag is automatically added to every run.
+
+| Option        | Type                           | Description                                                      |
+| ------------- | ------------------------------ | ---------------------------------------------------------------- |
+| `tags`        | `string[]`                     | Additional tags for the run (merged with auto-tags, max 5 total) |
+| `queue`       | `string`                       | Queue name for the run                                           |
+| `maxAttempts` | `number`                       | Maximum retry attempts                                           |
+| `machine`     | `"micro" \| "small-1x" \| ...` | Machine preset for the run                                       |
+| `priority`    | `number`                       | Priority (lower = higher priority)                               |
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: getChatToken,
+  triggerOptions: {
+    tags: ["user:123"],
+    queue: "chat-queue",
+  },
+});
+```
+
+### transport.preload()
+
+Eagerly trigger a run before the first message.
+
+```ts
+transport.preload(chatId, { idleTimeoutInSeconds?: number }): Promise<void>
+```
+
+No-op if a session already exists for this chatId. See [Preload](/ai-chat/features#preload) for full details.
+
+## useTriggerChatTransport
+
+React hook that creates and memoizes a `TriggerChatTransport` instance. Import from `@trigger.dev/sdk/chat/react`.
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken, // (params) => … — same shape as ResolveChatAccessTokenParams
+  sessions: savedSessions,
+  onSessionChange: handleSessionChange,
+});
+```
+
+The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
+
+## Related
+
+- [Realtime Streams](/tasks/streams) — How streams work under the hood
+- [Using the Vercel AI SDK](/guides/examples/vercel-ai-sdk) — Basic AI SDK usage with Trigger.dev
+- [Realtime React Hooks](/realtime/react-hooks/overview) — Lower-level realtime hooks
+- [Authentication](/realtime/auth) — Public access tokens and trigger tokens
diff --git a/docs/ai-chat/types.mdx b/docs/ai-chat/types.mdx
new file mode 100644
index 00000000000..8ddfff063f0
--- /dev/null
+++ b/docs/ai-chat/types.mdx
@@ -0,0 +1,137 @@
+---
+title: "Types"
+sidebarTitle: "Types"
+description: "TypeScript types for AI Chat tasks, UI messages, and the frontend transport."
+---
+
+TypeScript patterns for [AI Chat](/ai-chat/overview). This page will expand over time; it currently documents how to pin a custom AI SDK [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype with `chat.withUIMessage` and align types on the client.
+
+## Custom `UIMessage` with `chat.withUIMessage`
+
+`chat.task()` types the wire payload with the base AI SDK `UIMessage`. That is enough for many apps.
+
+When you add **custom `data-*` parts** (via `chat.stream` / `writer`) or a **typed tool map** (e.g. `InferUITools<typeof tools>`), you want a **narrower** `UIMessage` generic so that:
+
+- `onTurnStart`, `onTurnComplete`, and similar hooks expose correctly typed `uiMessages`
+- Stream options like `sendReasoning` align with your message shape
+- The frontend can treat `useChat` messages as the same subtype end-to-end
+
+`chat.withUIMessage<YourUIMessage>(config?)` returns `{ task }`, where `task(...)` accepts the **same options as** [`chat.task()`](/ai-chat/backend#chat-task) but fixes `YourUIMessage` as the UI message type for that chat task.
+
+### Defining a `UIMessage` subtype
+
+Build the type from AI SDK helpers and your tools object:
+
+```ts
+import type { InferUITools, UIDataTypes, UIMessage } from "ai";
+import { tool } from "ai";
+import { z } from "zod";
+
+const myTools = {
+  lookup: tool({
+    description: "Look up a record",
+    inputSchema: z.object({ id: z.string() }),
+    execute: async ({ id }) => ({ id, label: "example" }),
+  }),
+};
+
+type MyChatTools = InferUITools<typeof myTools>;
+
+type MyChatDataTypes = UIDataTypes & {
+  "turn-status": { status: "preparing" | "streaming" | "done" };
+};
+
+export type MyChatUIMessage = UIMessage<unknown, MyChatDataTypes, MyChatTools>;
+```
+
+Task-backed tools should use AI SDK [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) with `execute: ai.toolExecute(schemaTask)` where needed — see [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
+
+### Backend: `chat.withUIMessage(...).task(...)`
+
+Call `withUIMessage` **once**, then chain `.task({ ... })` instead of `chat.task({ ... })`:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+import type { MyChatUIMessage } from "./my-chat-types";
+
+const myTools = {
+  lookup: tool({
+    description: "Look up a record",
+    inputSchema: z.object({ id: z.string() }),
+    execute: async ({ id }) => ({ id, label: "example" }),
+  }),
+};
+
+export const myChat = chat.withUIMessage<MyChatUIMessage>({
+  streamOptions: {
+    sendReasoning: true,
+    onError: (error) =>
+      error instanceof Error ? error.message : "Something went wrong.",
+  },
+}).task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onTurnStart: async ({ uiMessages, writer }) => {
+    // uiMessages is MyChatUIMessage[] — custom data parts are typed
+    writer.write({
+      type: "data-turn-status",
+      data: { status: "preparing" },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      tools: myTools,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Default stream options
+
+The optional `streamOptions` object becomes the **default** [`uiMessageStreamOptions`](/ai-chat/reference#chat-task-options) for `toUIMessageStream()`.
+
+If you also set `uiMessageStreamOptions` on the inner `.task({ ... })`, the two objects are **shallow-merged** — keys on the **task** win on conflicts. Per-turn overrides via [`chat.setUIMessageStreamOptions()`](/ai-chat/backend#stream-options) still apply on top.
+
+### Frontend: `InferChatUIMessage`
+
+Import the helper type and pass it to `useChat` so `messages` and render logic match the backend:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, type InferChatUIMessage } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "./myChat";
+
+type Msg = InferChatUIMessage<typeof myChat>;
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken,
+  });
+
+  const { messages } = useChat<Msg>({ transport });
+
+  return messages.map((m) => (
+    <div key={m.id}>{/* m.parts narrowed for your UIMessage subtype */}</div>
+  ));
+}
+```
+
+You can also import `InferChatUIMessage` from `@trigger.dev/sdk/ai` in non-React modules.
+
+### When plain `chat.task()` is enough
+
+If you do not rely on custom `UIMessage` generics (only default text, reasoning, and built-in tool UI types), **`chat.task()` alone is fine** — no need for `withUIMessage`.
+
+## See also
+
+- [Backend — `chat.task()`](/ai-chat/backend#chat-task)
+- [Frontend — transport & `useChat`](/ai-chat/frontend)
+- [API reference — `chat.withUIMessage`](/ai-chat/reference#chat-withuimessage)
+- [Task-backed AI tools — `ai.toolExecute`](/tasks/schemaTask#task-backed-ai-tools)
diff --git a/docs/ai/prompts.mdx b/docs/ai/prompts.mdx
new file mode 100644
index 00000000000..4ac324ffff9
--- /dev/null
+++ b/docs/ai/prompts.mdx
@@ -0,0 +1,424 @@
+---
+title: "Prompts"
+sidebarTitle: "Prompts"
+description: "Define prompt templates as code, version them on deploy, and override from the dashboard without redeploying."
+---
+
+## Overview
+
+AI Prompts let you define prompt templates in your codebase alongside your tasks. When you deploy, Trigger.dev automatically versions your prompts. You can then:
+
+- View all prompt versions in the dashboard
+- Create **overrides** to change the prompt text or model without redeploying
+- Track every generation that used each prompt version
+- See token usage, cost, and latency metrics per prompt
+- Manage prompts programmatically via SDK methods
+
+## Defining a prompt
+
+Use `prompts.define()` to create a prompt with typed variables:
+
+```ts
+import { prompts } from "@trigger.dev/sdk";
+import { z } from "zod";
+
+export const supportPrompt = prompts.define({
+  id: "customer-support",
+  description: "System prompt for customer support interactions",
+  model: "gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({
+    customerName: z.string(),
+    plan: z.string(),
+    issue: z.string(),
+  }),
+  content: `You are a support agent for Acme SaaS.
+
+## Customer context
+
+- **Name:** {{customerName}}
+- **Plan:** {{plan}}
+- **Issue:** {{issue}}
+
+Respond to the customer's issue. Be concise and helpful.`,
+});
+```
+
+### Options
+
+| Option | Type | Required | Description |
+|--------|------|----------|-------------|
+| `id` | `string` | Yes | Unique identifier (becomes the prompt slug) |
+| `description` | `string` | No | Shown in the dashboard |
+| `model` | `string` | No | Default model (e.g. `"gpt-4o"`, `"claude-sonnet-4-6"`) |
+| `config` | `object` | No | Default config (temperature, maxTokens, etc.) |
+| `variables` | Zod/ArkType schema | No | Schema for template variables (enables validation and dashboard UI) |
+| `content` | `string` | Yes | The prompt template with `{{variable}}` placeholders |
+
+### Template syntax
+
+Templates use Mustache-style placeholders:
+
+- `{{variableName}}` — replaced with the variable value
+- `{{#conditionalVar}}...{{/conditionalVar}}` — content only included if the variable is truthy
+
+```ts
+export const prompt = prompts.define({
+  id: "summarizer",
+  model: "gpt-4o-mini",
+  variables: z.object({
+    text: z.string(),
+    maxSentences: z.string().optional(),
+  }),
+  content: `Summarize the following text{{#maxSentences}} in {{maxSentences}} sentences or fewer{{/maxSentences}}:
+
+{{text}}`,
+});
+```
+
+## Resolving a prompt
+
+### Via prompt handle
+
+Call `.resolve()` on the handle returned by `define()`:
+
+```ts
+const resolved = await supportPrompt.resolve({
+  customerName: "Alice",
+  plan: "Pro",
+  issue: "Cannot access billing dashboard",
+});
+
+console.log(resolved.text);    // The compiled prompt with variables filled in
+console.log(resolved.version); // e.g. 3
+console.log(resolved.model);   // "gpt-4o"
+console.log(resolved.labels);  // ["current"] or ["override"]
+```
+
+### Via standalone prompts.resolve()
+
+Resolve any prompt by slug without needing a handle. Pass the prompt handle as a type parameter for full type safety:
+
+```ts
+import { prompts } from "@trigger.dev/sdk";
+import type { supportPrompt } from "./prompts";
+
+// Fully typesafe — ID and variables are checked at compile time
+const resolved = await prompts.resolve<typeof supportPrompt>("customer-support", {
+  customerName: "Alice",
+  plan: "Pro",
+  issue: "Cannot access billing dashboard",
+});
+```
+
+Without the generic, the function still works but accepts any string slug and `Record<string, unknown>` variables.
+
+### Resolve options
+
+You can resolve a specific version or label:
+
+```ts
+// Resolve a specific version
+const v2 = await supportPrompt.resolve(variables, { version: 2 });
+
+// Resolve by label
+const current = await supportPrompt.resolve(variables, { label: "current" });
+```
+
+By default, `resolve()` returns the **override** version if one is active, otherwise the **current** (latest deployed) version.
+
+<Note>
+  Both `promptHandle.resolve()` and `prompts.resolve()` call the Trigger.dev API when a client is configured. During local dev with `trigger dev`, this means you'll always get the server version (including overrides).
+</Note>
+
+## Using with the AI SDK
+
+The resolved prompt integrates with the [Vercel AI SDK](https://ai-sdk.dev) via `toAISDKTelemetry()`. This links AI generation spans to the prompt in the dashboard.
+
+### generateText
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const supportTask = task({
+  id: "handle-support",
+  run: async (payload) => {
+    const resolved = await supportPrompt.resolve({
+      customerName: payload.name,
+      plan: payload.plan,
+      issue: payload.issue,
+    });
+
+    const result = await generateText({
+      model: openai(resolved.model ?? "gpt-4o"),
+      system: resolved.text,
+      prompt: payload.issue,
+      ...resolved.toAISDKTelemetry(),
+    });
+
+    return { response: result.text };
+  },
+});
+```
+
+### streamText
+
+```ts
+import { streamText } from "ai";
+
+export const streamTask = task({
+  id: "stream-support",
+  run: async (payload) => {
+    const resolved = await supportPrompt.resolve({
+      customerName: payload.name,
+      plan: payload.plan,
+      issue: payload.issue,
+    });
+
+    const result = streamText({
+      model: openai(resolved.model ?? "gpt-4o"),
+      system: resolved.text,
+      prompt: payload.issue,
+      ...resolved.toAISDKTelemetry(),
+    });
+
+    let fullText = "";
+    for await (const chunk of result.textStream) {
+      fullText += chunk;
+    }
+
+    return { response: fullText };
+  },
+});
+```
+
+### Custom telemetry metadata
+
+Pass additional metadata to `toAISDKTelemetry()` that will appear on the generation span:
+
+```ts
+const result = await generateText({
+  model: openai("gpt-4o"),
+  prompt: resolved.text,
+  ...resolved.toAISDKTelemetry({
+    "task.type": "summarization",
+    "customer.tier": "enterprise",
+  }),
+});
+```
+
+## Using with chat.task()
+
+Prompts integrate with `chat.task()` via `chat.prompt` — a run-scoped store for the resolved prompt. Store a prompt once in a lifecycle hook, then access it anywhere during the run.
+
+### chat.prompt.set() and chat.prompt()
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, createProviderRegistry } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+const registry = createProviderRegistry({ openai, anthropic });
+
+const systemPrompt = prompts.define({
+  id: "my-chat-system",
+  model: "openai:gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({ name: z.string() }),
+  content: `You are a helpful assistant for {{name}}.`,
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatStart: async ({ clientData }) => {
+    const resolved = await systemPrompt.resolve({ name: clientData.name });
+    chat.prompt.set(resolved);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### chat.toStreamTextOptions()
+
+Returns an options object ready to spread into `streamText()`. When a prompt is stored via `chat.prompt.set()`, it includes:
+
+- `system` — the compiled prompt text
+- `model` — resolved via the `registry` when provided
+- `temperature`, `maxTokens`, etc. — from the prompt's `config`
+- `experimental_telemetry` — links generations to the prompt in the dashboard
+
+```ts
+// With registry — model is resolved automatically
+const options = chat.toStreamTextOptions({ registry });
+// { system: "...", model: LanguageModel, temperature: 0.7, experimental_telemetry: { ... } }
+
+// Without registry — model is not included
+const options = chat.toStreamTextOptions();
+// { system: "...", temperature: 0.7, experimental_telemetry: { ... } }
+```
+
+<Tip>
+  When the user provides a `registry` and the prompt has a `model` string (e.g. `"openai:gpt-4o"`), the model is resolved via `registry.languageModel()` and included in the returned options. This means `streamText` uses the prompt's model by default — no manual model selection needed.
+</Tip>
+
+### Reading the prompt
+
+Access the stored prompt from anywhere in the run:
+
+```ts
+run: async ({ messages, signal }) => {
+  const prompt = chat.prompt(); // Throws if not set
+  console.log(prompt.text);     // The compiled prompt
+  console.log(prompt.model);    // "openai:gpt-4o"
+  console.log(prompt.version);  // 3
+
+  return streamText({
+    ...chat.toStreamTextOptions({ registry }),
+    messages,
+    abortSignal: signal,
+  });
+},
+```
+
+You can also set a plain string if you don't need the full prompt system:
+
+```ts
+chat.prompt.set("You are a helpful assistant.");
+```
+
+## Prompt management SDK
+
+The `prompts` namespace includes methods for managing prompts programmatically. These work both inside tasks and outside (e.g. scripts, API handlers) as long as an API client is configured.
+
+### List prompts
+
+```ts
+const allPrompts = await prompts.list();
+```
+
+### List versions
+
+```ts
+const versions = await prompts.versions("customer-support");
+```
+
+### Create an override
+
+Create a new override that takes priority over the deployed version:
+
+```ts
+const result = await prompts.createOverride("customer-support", {
+  textContent: "New prompt template: Hello {{customerName}}!",
+  model: "gpt-4o-mini",
+  commitMessage: "Shorter prompt",
+});
+```
+
+### Update an override
+
+```ts
+await prompts.updateOverride("customer-support", {
+  textContent: "Updated template: Hi {{customerName}}!",
+  model: "gpt-4o",
+});
+```
+
+### Remove an override
+
+Remove the active override, reverting to the deployed version:
+
+```ts
+await prompts.removeOverride("customer-support");
+```
+
+### Promote a version
+
+```ts
+await prompts.promote("customer-support", 2);
+```
+
+### All management methods
+
+| Method | Description |
+|--------|-------------|
+| `prompts.list()` | List all prompts in the current environment |
+| `prompts.versions(slug)` | List all versions for a prompt |
+| `prompts.resolve(slug, variables?, options?)` | Resolve a prompt by slug |
+| `prompts.promote(slug, version)` | Promote a version to current |
+| `prompts.createOverride(slug, body)` | Create an override |
+| `prompts.updateOverride(slug, body)` | Update the active override |
+| `prompts.removeOverride(slug)` | Remove the active override |
+| `prompts.reactivateOverride(slug, version)` | Reactivate a removed override |
+
+## Overrides
+
+Overrides let you change a prompt's template or model from the dashboard or SDK without redeploying your code. When an override is active, `resolve()` returns the override version instead of the deployed version.
+
+### How overrides work
+
+- Overrides take priority over the deployed ("current") version
+- Only one override can be active at a time
+- Creating a new override replaces the previous one
+- Removing an override reverts to the deployed version
+- Overrides are environment-scoped (dev, staging, production are independent)
+
+### Creating an override (dashboard)
+
+1. Go to the prompt detail page
+2. Click **Create Override**
+3. Edit the template text and/or model
+4. Add an optional commit message
+5. Click **Create override**
+
+### Version resolution order
+
+When `resolve()` is called, versions are resolved in this order:
+
+1. **Specific version** — if `{ version: N }` is passed
+2. **Override** — if an override is active in this environment
+3. **Label** — if `{ label: "..." }` is passed (defaults to `"current"`)
+4. **Current** — the latest deployed version with the "current" label
+
+## Dashboard
+
+### Prompts list
+
+The prompts list page shows all prompts in the current environment with the current or override version, default model, and a usage sparkline.
+
+### Prompt detail
+
+Click a prompt to see:
+
+- **Template panel** — the prompt template for the selected version
+- **Details tab** — slug, description, model, config, source file, and variable schema
+- **Versions tab** — all versions with labels, source, and commit messages
+- **Generations tab** — every AI generation that used this prompt, with live polling
+- **Metrics tab** — token usage, cost, and latency charts
+
+### AI span inspectors
+
+When you use `toAISDKTelemetry()`, AI generation spans in the run trace get a custom inspector showing:
+
+- **Overview** — model, provider, token usage, cost, input/output preview
+- **Messages** — the full message thread
+- **Tools** — tool definitions and tool call details
+- **Prompt** — the linked prompt's metadata, input variables, and template content
+
+## Type utilities
+
+```ts
+import type { PromptHandle, PromptIdentifier, PromptVariables } from "@trigger.dev/sdk";
+
+type Id = PromptIdentifier<typeof supportPrompt>;   // "customer-support"
+type Vars = PromptVariables<typeof supportPrompt>;   // { customerName: string; plan: string; issue: string }
+```
diff --git a/docs/docs.json b/docs/docs.json
index 779b5d53fb5..4854d6d016e 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -80,6 +80,27 @@
               "hidden-tasks"
             ]
           },
+          {
+            "group": "AI",
+            "pages": [
+              "ai/prompts",
+              {
+                "group": "Chat",
+                "pages": [
+                  "ai-chat/overview",
+                  "ai-chat/quick-start",
+                  "ai-chat/backend",
+                  "ai-chat/frontend",
+                  "ai-chat/types",
+                  "ai-chat/features",
+                  "ai-chat/compaction",
+                  "ai-chat/pending-messages",
+                  "ai-chat/background-injection",
+                  "ai-chat/reference"
+                ]
+              }
+            ]
+          },
           {
             "group": "Configuration",
             "pages": [
@@ -733,6 +754,10 @@
     {
       "source": "/insights/metrics",
       "destination": "/observability/dashboards"
+    },
+    {
+      "source": "/guides/ai-chat",
+      "destination": "/ai-chat/overview"
     }
   ]
 }
diff --git a/docs/migrating-from-v3.mdx b/docs/migrating-from-v3.mdx
index 5530d66b62d..c820b25a1de 100644
--- a/docs/migrating-from-v3.mdx
+++ b/docs/migrating-from-v3.mdx
@@ -34,7 +34,7 @@ We're retiring Trigger.dev v3. **New v3 deploys will stop working from 1 April 2
 | [Hidden tasks](/hidden-tasks)                                        | Create tasks that are not exported from your trigger files but can still be executed.                                                                                                                      |
 | [Middleware & locals](#middleware-and-locals)                        | The middleware system runs at the top level, executing before and after all lifecycle hooks. The locals API allows sharing data between middleware and hooks.                                              |
 | [useWaitToken](/realtime/react-hooks/use-wait-token)                 | Use the useWaitToken hook to complete a wait token from a React component.                                                                                                                                 |
-| [ai.tool](/tasks/schemaTask#ai-tool)                                 | Create an AI tool from an existing `schemaTask` to use with the Vercel [AI SDK](https://vercel.com/docs/ai-sdk).                                                                                           |
+| [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools)       | Use `schemaTask` with AI SDK `tool()` and `ai.toolExecute()` (legacy `ai.tool` is deprecated).                                                                                                              |
 
 ## Node.js support
 
@@ -165,7 +165,7 @@ export const myAiTask = schemaTask({
 });
 ```
 
-We've replaced the `toolTask` function with the `ai.tool` function, which creates an AI tool from an existing `schemaTask`. See the [ai.tool](/tasks/schemaTask#ai-tool) page for more details.
+We've replaced the `toolTask` function with `schemaTask` plus AI SDK `tool()` and `ai.toolExecute()` (the older `ai.tool()` wrapper is deprecated). See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
 
 ## Breaking changes
 
diff --git a/docs/snippets/migrate-v4-using-ai.mdx b/docs/snippets/migrate-v4-using-ai.mdx
index fa749ed7231..aa5393c158d 100644
--- a/docs/snippets/migrate-v4-using-ai.mdx
+++ b/docs/snippets/migrate-v4-using-ai.mdx
@@ -56,7 +56,7 @@ const myTask = task({
   },
 });
 
-We’ve deprecated the `toolTask` function and replaced it with the `ai.tool` function, which creates an AI tool from an existing `schemaTask`. This is the old version:
+We’ve deprecated the `toolTask` function. Use `schemaTask` plus AI SDK `tool()` with `execute: ai.toolExecute(task)` (the `ai.tool()` wrapper is deprecated). This is the old version:
 
 import { toolTask, schemaTask } from "@trigger.dev/sdk";
 import { z } from "zod";
@@ -85,9 +85,11 @@ export const myAiTask = schemaTask({
 
 This is the new version:
 
-import { schemaTask, ai } from "@trigger.dev/sdk";
+import { schemaTask } from "@trigger.dev/sdk";
+import { ai } from "@trigger.dev/sdk/ai";
 import { z } from "zod";
-import { generateText } from "ai";
+import { generateText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
 
 // Convert toolTask to schemaTask with a schema
 const myToolTask = schemaTask({
@@ -99,8 +101,11 @@ const myToolTask = schemaTask({
   run: async (payload, { ctx }) => {},
 });
 
-// Create an AI tool from the schemaTask
-const myTool = ai.tool(myToolTask);
+const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 
 export const myAiTask = schemaTask({
   id: "my-ai-task",
@@ -112,7 +117,7 @@ export const myAiTask = schemaTask({
       prompt: payload.text,
       model: openai("gpt-4o"),
       tools: {
-        myTool, // Use the ai.tool created from schemaTask
+        myTool,
       },
     });
   },
diff --git a/docs/tasks/schemaTask.mdx b/docs/tasks/schemaTask.mdx
index 3692d1d7035..82ba4aa5679 100644
--- a/docs/tasks/schemaTask.mdx
+++ b/docs/tasks/schemaTask.mdx
@@ -76,51 +76,63 @@ await myTask.trigger({ age: 30, dob: "2020-01-01" }); // this is valid
 await myTask.trigger({ name: "Alice", age: 30, dob: "2020-01-01" }); // this is also valid
 ```
 
-## `ai.tool`
+## Task-backed AI tools
 
-The `ai.tool` function allows you to create an AI tool from an existing `schemaTask` to use with the Vercel [AI SDK](https://vercel.com/docs/ai-sdk):
+Use a `schemaTask` as the implementation of a Vercel [AI SDK](https://vercel.com/docs/ai-sdk) tool: the model calls the tool, and Trigger runs your task as a **subtask** with tool-call metadata, optional [chat context](/ai-chat/features#task-tool-subtasks), and the same payload validation as a normal trigger.
+
+### Recommended: `ai.toolExecute` with `tool()`
+
+Prefer building the tool with the AI SDK’s [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) and passing **`execute: ai.toolExecute(yourTask)`**. You keep full control of `description`, `inputSchema`, and AI-SDK-only options (for example `experimental_toToolResultContent`), and your types follow the `ai` version installed in **your** app.
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
+import { tool, generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
-import { generateText } from "ai";
 
 const myToolTask = schemaTask({
   id: "my-tool-task",
   schema: z.object({
     foo: z.string(),
   }),
-  run: async (payload: any, { ctx }) => {},
+  run: async ({ foo }) => {
+    return { bar: foo.toUpperCase() };
+  },
 });
 
-const myTool = ai.tool(myToolTask);
+const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 
 export const myAiTask = schemaTask({
   id: "my-ai-task",
   schema: z.object({
     text: z.string(),
   }),
-  run: async (payload, { ctx }) => {
-    const { text } = await generateText({
-      prompt: payload.text,
+  run: async ({ text }) => {
+    const { text: reply } = await generateText({
+      prompt: text,
       model: openai("gpt-4o"),
       tools: {
         myTool,
       },
     });
+    return reply;
   },
 });
 ```
 
-You can also pass the `experimental_toToolResultContent` option to the `ai.tool` function to customize the content of the tool result:
+`experimental_toToolResultContent` and other tool-level options belong on **`tool({ ... })`**, not on `ai.toolExecute`:
 
 ```ts
 import { openai } from "@ai-sdk/openai";
 import { Sandbox } from "@e2b/code-interpreter";
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
-import { generateObject } from "ai";
+import { generateObject, tool } from "ai";
 import { z } from "zod";
 
 const chartTask = schemaTask({
@@ -135,56 +147,37 @@ const chartTask = schemaTask({
       schema: z.object({
         code: z.string().describe("The Python code to execute"),
       }),
-      system: `
-        You are a helpful assistant that can generate Python code to be executed in a sandbox, using matplotlib.pyplot.
-
-        For example: 
-        
-        import matplotlib.pyplot as plt
-        plt.plot([1, 2, 3, 4])
-        plt.ylabel('some numbers')
-        plt.show()
-        
-        Make sure the code ends with plt.show()
-      `,
+      system: `You are a helpful assistant that generates matplotlib code. End with plt.show().`,
       prompt: input,
     });
 
     const sandbox = await Sandbox.create();
-
     const execution = await sandbox.runCode(code.object.code);
-
     const firstResult = execution.results[0];
 
     if (firstResult.png) {
-      return {
-        chart: firstResult.png,
-      };
-    } else {
-      throw new Error("No chart generated");
+      return { chart: firstResult.png };
     }
+    throw new Error("No chart generated");
   },
 });
 
-// This is useful if you want to return an image from the tool
-export const chartTool = ai.tool(chartTask, {
-  experimental_toToolResultContent: (result) => {
-    return [
-      {
-        type: "image",
-        data: result.chart,
-        mimeType: "image/png",
-      },
-    ];
-  },
+export const chartTool = tool({
+  description: chartTask.description ?? "",
+  inputSchema: chartTask.schema!,
+  execute: ai.toolExecute(chartTask),
+  experimental_toToolResultContent: (result) => [
+    { type: "image", data: result.chart, mimeType: "image/png" },
+  ],
 });
 ```
 
-You can access the current tool execution options inside the task run function using the `ai.currentToolOptions()` function:
+Inside the task run, you can read tool execution context with **`ai.currentToolOptions()`** (and helpers like `ai.toolCallId()`, `ai.chatContext()` when running inside a [`chat.task`](/ai-chat/overview)):
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
+import { tool } from "ai";
 import { z } from "zod";
 
 const myToolTask = schemaTask({
@@ -192,22 +185,49 @@ const myToolTask = schemaTask({
   schema: z.object({
     foo: z.string(),
   }),
-  run: async (payload, { ctx }) => {
+  run: async ({ foo }) => {
     const toolOptions = ai.currentToolOptions();
     console.log(toolOptions);
+    return { foo };
   },
 });
 
-export const myAiTask = ai.tool(myToolTask);
+export const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 ```
 
-See the [AI SDK tool execution options docs](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling#tool-execution-options) for more details on the tool execution options.
+See the [AI SDK tool execution options](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling#tool-execution-options) for fields passed through the runtime.
 
 <Note>
-  `ai.tool` is compatible with `schemaTask`'s defined with Zod and ArkType schemas, or any schemas
-  that implement a `.toJsonSchema()` function.
+  `ai.toolExecute` works with `schemaTask` definitions that use Zod, ArkType, or any schema that provides a JSON schema via `.toJsonSchema()` (same coverage as the legacy `ai.tool` wrapper).
 </Note>
 
+### Deprecated: `ai.tool`
+
+The **`ai.tool(task, options?)`** helper is **deprecated**. It constructs an AI SDK `Tool` for you (using `tool()` for Zod-like schemas and `dynamicTool()` otherwise) and may be removed in a future major version. New code should use **`tool({ ..., execute: ai.toolExecute(task) })`** as shown above.
+
+### Legacy `ai.tool` example (deprecated)
+
+```ts
+import { ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { z } from "zod";
+import { generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+const myToolTask = schemaTask({
+  id: "my-tool-task",
+  schema: z.object({ foo: z.string() }),
+  run: async ({ foo }) => ({ foo }),
+});
+
+// Deprecated — prefer tool({ execute: ai.toolExecute(myToolTask), ... })
+const myTool = ai.tool(myToolTask);
+```
+
 ## Supported schema types
 
 ### Zod
diff --git a/references/ai-chat/ARCHITECTURE.md b/references/ai-chat/ARCHITECTURE.md
new file mode 100644
index 00000000000..8adbc0c4a1a
--- /dev/null
+++ b/references/ai-chat/ARCHITECTURE.md
@@ -0,0 +1,311 @@
+# AI Chat Architecture
+
+## System Overview
+
+```mermaid
+graph TB
+    subgraph Frontend["Frontend (Browser)"]
+        UC[useChat Hook]
+        TCT[TriggerChatTransport]
+        UI[Chat UI Components]
+    end
+
+    subgraph Platform["Trigger.dev Platform"]
+        API[REST API]
+        RS[Realtime Streams]
+        RE[Run Engine]
+    end
+
+    subgraph Worker["Task Worker"]
+        CT[chat.task Turn Loop]
+        ST[streamText / AI SDK]
+        LLM[LLM Provider]
+        SUB[Subtasks via ai.tool]
+    end
+
+    UI -->|user types| UC
+    UC -->|sendMessages| TCT
+    TCT -->|triggerTask / sendInputStream| API
+    API -->|queue run / deliver input| RE
+    RE -->|execute| CT
+    CT -->|call| ST
+    ST -->|API call| LLM
+    LLM -->|stream chunks| ST
+    ST -->|UIMessageChunks| RS
+    RS -->|SSE| TCT
+    TCT -->|ReadableStream| UC
+    UC -->|update| UI
+    CT -->|triggerAndWait| SUB
+    SUB -->|chat.stream target:root| RS
+```
+
+## Detailed Flow: New Chat (First Message)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: No session for chatId → trigger new run
+
+    useChat->>API: triggerTask(payload, tags: [chat:id])
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session, subscribe to SSE
+
+    API->>Task: Start run with ChatTaskWirePayload
+
+    Note over Task: Preload phase skipped (trigger ≠ "preload")
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 0
+        Task->>Task: convertToModelMessages(uiMessages)
+        Task->>Task: Mint access token
+        Task->>Task: onChatStart({ chatId, messages, clientData })
+        Task->>Task: onTurnStart({ chatId, messages, uiMessages })
+        Task->>LLM: streamText({ model, messages, abortSignal })
+        LLM-->>Task: Stream response chunks
+        Task->>API: streams.pipe("chat", uiStream)
+        API-->>useChat: SSE: UIMessageChunks
+        useChat-->>User: Render streaming text
+        Task->>Task: onFinish → capturedResponseMessage
+        Task->>Task: Accumulate response in messages
+        Task->>API: Write __trigger_turn_complete chunk
+        API-->>useChat: SSE: { type: __trigger_turn_complete, publicAccessToken }
+        useChat->>useChat: Close stream, update session
+        Task->>Task: onTurnComplete({ messages, uiMessages, stopped })
+    end
+
+    rect rgb(255, 248, 240)
+        Note over Task: Wait for next message
+        Task->>Task: messagesInput.once() [warm, 30s]
+        Note over Task: No message → suspend
+        Task->>Task: messagesInput.wait() [suspended, 1h]
+    end
+```
+
+## Detailed Flow: Multi-Turn (Subsequent Messages)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Suspended, waiting for message
+
+    User->>useChat: sendMessage("Tell me more")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    Note right of useChat: Only sends new message<br/>(not full history)
+
+    API->>Task: Deliver to messagesInput
+    Task->>Task: Wake from suspend
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 1
+        Task->>Task: Append new message to accumulators
+        Task->>Task: Mint fresh access token
+        Task->>Task: onTurnStart({ turn: 1, messages })
+        Task->>LLM: streamText({ messages: [all accumulated] })
+        LLM-->>Task: Stream response
+        Task->>API: streams.pipe("chat", uiStream)
+        API-->>useChat: SSE: UIMessageChunks
+        useChat-->>User: Render streaming text
+        Task->>API: Write __trigger_turn_complete
+        Task->>Task: onTurnComplete({ turn: 1 })
+    end
+
+    Task->>Task: Wait for next message (warm → suspend)
+```
+
+## Stop Signal Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Streaming response...
+
+    User->>useChat: Click "Stop"
+    useChat->>API: sendInputStream(runId, "chat-stop", { stop: true })
+    useChat->>useChat: Set skipToTurnComplete = true
+
+    API->>Task: Deliver to stopInput
+    Task->>Task: stopController.abort()
+    Task->>LLM: AbortSignal fires
+    LLM-->>Task: Stream ends (AbortError)
+    Task->>Task: Catch AbortError, fall through
+    Task->>Task: await onFinishPromise (race condition fix)
+    Task->>Task: cleanupAbortedParts(responseMessage)
+    Note right of Task: Remove partial tool calls<br/>Mark streaming parts as done
+
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: __trigger_turn_complete
+    useChat->>useChat: skipToTurnComplete = false, close stream
+
+    Task->>Task: onTurnComplete({ stopped: true, responseMessage: cleaned })
+    Task->>Task: Wait for next message
+```
+
+## Preload Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+
+    User->>useChat: Click "New Chat"
+    useChat->>API: transport.preload(chatId)
+    Note right of useChat: payload: { messages: [], trigger: "preload" }<br/>tags: [chat:id, preload:true]
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session
+
+    API->>Task: Start run (trigger = "preload")
+
+    rect rgb(240, 255, 240)
+        Note over Task: Preload Phase
+        Task->>Task: Mint access token
+        Task->>Task: onPreload({ chatId, clientData })
+        Note right of Task: DB setup, load user context,<br/>load dynamic tools
+        Task->>Task: messagesInput.once() [warm]
+        Note over Task: Waiting for first message...
+    end
+
+    Note over User: User is typing...
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    API->>Task: Deliver message
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 0 (preloaded = true)
+        Task->>Task: onChatStart({ preloaded: true })
+        Task->>Task: onTurnStart({ preloaded: true })
+        Task->>Task: run() with preloaded dynamic tools ready
+    end
+```
+
+## Subtask Streaming (Tool as Task)
+
+```mermaid
+sequenceDiagram
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Chat as chat.task
+    participant LLM as LLM Provider
+    participant Sub as Subtask (ai.tool)
+
+    Chat->>LLM: streamText({ tools: { research: ai.tool(task) } })
+    LLM-->>Chat: Tool call: research({ query, urls })
+
+    Chat->>API: triggerAndWait(subtask, input)
+    Note right of Chat: Passes toolCallId, chatId,<br/>clientData via metadata
+
+    API->>Sub: Start subtask
+
+    Sub->>Sub: ai.chatContextOrThrow() → { chatId, clientData }
+    Sub->>API: chat.stream.writer({ target: "root" })
+    Note right of Sub: Write data-research-progress<br/>chunks to parent's stream
+    API-->>useChat: SSE: data-* chunks
+    useChat-->>useChat: Render progress UI
+
+    Sub-->>Chat: Return result
+    Chat->>LLM: Tool result
+    LLM-->>Chat: Continue response
+```
+
+## Continuation Flow (Run Timeout / Cancel)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+
+    Note over Task: Previous run timed out / was cancelled
+
+    User->>useChat: sendMessage("Continue")
+    useChat->>API: sendInputStream(runId, payload)
+    API-->>useChat: Error (run dead)
+
+    useChat->>useChat: Delete session, set isContinuation = true
+    useChat->>API: triggerTask(payload, continuation: true, previousRunId)
+    API-->>useChat: New { runId, publicAccessToken }
+
+    API->>Task: Start new run
+
+    rect rgb(255, 245, 238)
+        Note over Task: Turn 0 (continuation = true)
+        Task->>Task: cleanupAbortedParts(incoming messages)
+        Note right of Task: Strip incomplete tool calls<br/>from previous run's response
+        Task->>Task: onChatStart({ continuation: true, previousRunId })
+        Task->>Task: Normal turn flow...
+    end
+```
+
+## Hook Lifecycle
+
+```mermaid
+graph TD
+    START([Run Starts]) --> IS_PRELOAD{trigger = preload?}
+
+    IS_PRELOAD -->|Yes| PRELOAD[onPreload]
+    PRELOAD --> WAIT_MSG[Wait for first message<br/>warm → suspend]
+    WAIT_MSG --> TURN0
+
+    IS_PRELOAD -->|No| TURN0
+
+    TURN0[Turn 0] --> CHAT_START[onChatStart<br/>continuation, preloaded]
+    CHAT_START --> TURN_START_0[onTurnStart]
+    TURN_START_0 --> RUN_0[run → streamText]
+    RUN_0 --> TURN_COMPLETE_0[onTurnComplete<br/>stopped, responseMessage]
+
+    TURN_COMPLETE_0 --> WAIT{Wait for<br/>next message}
+    WAIT -->|Message arrives| TURN_N[Turn N]
+    WAIT -->|Timeout| END_RUN([Run Ends])
+
+    TURN_N --> TURN_START_N[onTurnStart]
+    TURN_START_N --> RUN_N[run → streamText]
+    RUN_N --> TURN_COMPLETE_N[onTurnComplete]
+    TURN_COMPLETE_N --> WAIT
+```
+
+## Stream Architecture
+
+```mermaid
+graph LR
+    subgraph Output["Output Stream (chat)"]
+        direction TB
+        O1[UIMessageChunks<br/>text, reasoning, tools]
+        O2[data-* custom chunks]
+        O3[__trigger_turn_complete<br/>control chunk]
+    end
+
+    subgraph Input["Input Streams"]
+        direction TB
+        I1[chat-messages<br/>User messages]
+        I2[chat-stop<br/>Stop signal]
+    end
+
+    Frontend -->|sendInputStream| I1
+    Frontend -->|sendInputStream| I2
+    I1 -->|messagesInput.once/wait| Worker
+    I2 -->|stopInput.on| Worker
+    Worker -->|streams.pipe / chat.stream| Output
+    Subtask -->|chat.stream target:root| Output
+    Output -->|SSE /realtime/v1/streams| Frontend
+```