diff --git a/docs/docs/api/appkit/Variable.agents.md b/docs/docs/api/appkit/Variable.agents.md
index b5a4ec03..227a5bf3 100644
--- a/docs/docs/api/appkit/Variable.agents.md
+++ b/docs/docs/api/appkit/Variable.agents.md
@@ -6,7 +6,8 @@ const agents: ToPlugin<typeof AgentsPlugin, AgentsPluginConfig, string>;
 
 Plugin factory for the agents plugin. Reads `config/agents/*.md` by default,
 resolves toolkits/tools from registered plugins, exposes `appkit.agents.*`
-runtime API and mounts `/invocations`.
+runtime API and mounts `POST /invocations` and `POST /responses` (aliased
+non-streaming invoke endpoints) plus `POST /chat` (streaming, HITL-capable).
 
 ## Example
 
diff --git a/docs/docs/api/appkit/index.md b/docs/docs/api/appkit/index.md
index 66b82649..3fcf9d8a 100644
--- a/docs/docs/api/appkit/index.md
+++ b/docs/docs/api/appkit/index.md
@@ -110,7 +110,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports.
 
 | Variable | Description |
 | ------ | ------ |
-| [agents](Variable.agents.md) | Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` runtime API and mounts `/invocations`. |
+| [agents](Variable.agents.md) | Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` runtime API and mounts `POST /invocations` and `POST /responses` (aliased non-streaming invoke endpoints) plus `POST /chat` (streaming, HITL-capable). |
 | [READ\_ACTIONS](Variable.READ_ACTIONS.md) | Actions that only read data. |
 | [sql](Variable.sql.md) | SQL helper namespace |
 | [WRITE\_ACTIONS](Variable.WRITE_ACTIONS.md) | Actions that mutate data. |
diff --git a/docs/docs/plugins/agents.md b/docs/docs/plugins/agents.md
index 0ba2ab30..2ae56c86 100644
--- a/docs/docs/plugins/agents.md
+++ b/docs/docs/plugins/agents.md
@@ -6,7 +6,7 @@ This plugin is currently **beta**. APIs may change between minor releases. Impor
 :::
 <!-- AUTO-GENERATED: stability-banner-end -->
 
-The `agents` plugin turns a Databricks AppKit app into an AI-agent host. It loads agent definitions from markdown on disk (one folder per agent: `config/agents/<id>/agent.md`), from TypeScript (`createAgent(def)`), or both, and exposes them at `POST /invocations` alongside routes for chat, thread management, and cancellation.
+The `agents` plugin turns a Databricks AppKit app into an AI-agent host. It loads agent definitions from markdown on disk (one folder per agent: `config/agents/<id>/agent.md`), from TypeScript (`createAgent(def)`), or both, and exposes them at `POST /invocations` and `POST /responses` (non-streaming, aliases) alongside `POST /chat` (streaming) and routes for thread management, cancellation, and HITL approval.
 
 This page covers the full lifecycle. For the hand-written primitives (`tool()`, `mcpServer()`), see [tools](./server.md).
 
@@ -31,7 +31,7 @@ await createApp({
 });
 ```
 
-That alone gives you a live HTTP server with `POST /invocations` wired to a markdown-driven agent.
+That alone gives you a live HTTP server with `POST /invocations` (and its alias `POST /responses`) wired to a markdown-driven agent. Use `POST /chat` instead when you want the streaming, HITL-capable surface.
 
 ## Level 1: drop a markdown agent package
 
@@ -65,7 +65,11 @@ On startup the plugin:
 
 The agent starts with **no tools**. Tools are opt-in — declare them in frontmatter (Level 2 below) or opt into auto-inherit explicitly with `agents({ autoInheritTools: { file: true } })`. See "Auto-inherit posture" further down for what that costs and why it's off by default.
 
-Requests land at `POST /invocations` with an OpenAI Responses-compatible body. Every tool call runs through `asUser(req)` so SQL executes as the requesting user, file access respects Unity Catalog ACLs, and telemetry spans are created automatically.
+Requests land at `POST /invocations` (or its alias `POST /responses`) with an OpenAI Responses-compatible body. These endpoints run the agent to completion and return a single JSON response — no SSE. Streaming clients should use `POST /chat`. Every tool call runs through `asUser(req)` so SQL executes as the requesting user, file access respects Unity Catalog ACLs, and telemetry spans are created automatically.
+
+:::warning No HITL on `/invocations` and `/responses`
+The non-streaming invoke surface has no way to surface a mid-call approval prompt back to the caller. When `approval.requireForDestructive` is enabled (default) and the resolved agent has any tool annotated with a mutating effect (`effect: "write" | "update" | "destructive"`, or the legacy `destructive: true`), `POST /invocations` and `POST /responses` reject the request with HTTP 400 before the adapter runs. Move HITL-capable agents to `POST /chat`, or disable approval via `agents({ approval: { requireForDestructive: false } })` for autonomous back-office agents.
+:::
 
 ## Level 2: scope tools in frontmatter
 
@@ -370,7 +374,7 @@ The route enforces that the decider is the stream owner: an approve from a diffe
 
 The plugin enforces a handful of caps to protect a single-instance deployment from runaway prompts, misbehaving clients, or prompt-injected delegation cycles. Some are static (enforced by the request schema) and some are configurable via `agents({ limits: { ... } })`.
 
-**Static caps** (applied at `POST /chat` and `POST /invocations` request parsing):
+**Static caps** (applied at `POST /chat`, `POST /invocations`, and `POST /responses` request parsing):
 
 | Field | Cap | Why |
 |---|---|---|
diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts
index 40217e54..c63ec094 100644
--- a/packages/appkit/src/plugins/agents/agents.ts
+++ b/packages/appkit/src/plugins/agents/agents.ts
@@ -10,6 +10,7 @@ import type {
   IAppRouter,
   Message,
   PluginPhase,
+  ResponseOutputMessage,
   ResponseStreamEvent,
   Thread,
   ToolAnnotations,
@@ -275,7 +276,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
     const { agents, defaultAgentName } = await this.buildAgentRegistry();
     this.agents = agents;
     this.defaultAgentName = defaultAgentName;
-    this.mountInvocationsRoute();
+    this.mountInvokeRoutes();
     this.printRegistry();
   }
 
@@ -762,15 +763,19 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
 
   // ----------------- Route mounting and handlers ---------------------------
 
-  private mountInvocationsRoute() {
+  /**
+   * Mount the non-streaming invoke endpoints outside the `/api/<plugin>`
+   * namespace. `/invocations` and `/responses` are aliases — both run the
+   * default agent to completion and return a single JSON response. Streaming
+   * lives on `POST /chat` (mounted in `injectRoutes`).
+   */
+  private mountInvokeRoutes() {
     if (!this.context) return;
-    this.context.addRoute(
-      "post",
-      "/invocations",
-      (req: express.Request, res: express.Response) => {
-        this._handleInvocations(req, res);
-      },
-    );
+    const handler = (req: express.Request, res: express.Response) => {
+      this._handleInvoke(req, res);
+    };
+    this.context.addRoute("post", "/invocations", handler);
+    this.context.addRoute("post", "/responses", handler);
   }
 
   injectRoutes(router: IAppRouter) {
@@ -896,10 +901,41 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
     return this._streamAgent(req, res, registered, thread, userId);
   }
 
-  private async _handleInvocations(
-    req: express.Request,
-    res: express.Response,
-  ) {
+  /**
+   * Returns the names of tools in `registered.toolIndex` whose annotations
+   * would trip the approval gate. Used by the non-streaming invoke path
+   * (`/invocations`, `/responses`) to fail-fast before the adapter runs:
+   * those endpoints have no channel back to the user mid-call, so an agent
+   * whose tool surface includes approval-gated tools cannot be served.
+   *
+   * Returns an empty list when the plugin is configured with
+   * `approval.requireForDestructive: false` — operators who explicitly
+   * disabled HITL keep the invoke surface unrestricted.
+   */
+  private collectApprovalRequiredToolNames(
+    registered: RegisteredAgent,
+  ): string[] {
+    if (!this.resolvedApprovalPolicy.requireForDestructive) return [];
+    const names: string[] = [];
+    for (const entry of registered.toolIndex.values()) {
+      if (requiresApproval(entry.def.annotations)) {
+        names.push(entry.def.name);
+      }
+    }
+    return names;
+  }
+
+  /**
+   * Shared handler for `POST /invocations` and `POST /responses`. Runs the
+   * default agent to completion and returns a single JSON response in the
+   * OpenAI Responses non-streaming shape. The two endpoints are aliases —
+   * streaming clients must use `POST /chat`.
+   *
+   * Rejects with HTTP 400 when the resolved agent has any approval-gated
+   * tool in scope: HITL requires a live SSE channel, which this surface
+   * does not provide. See {@link collectApprovalRequiredToolNames}.
+   */
+  private async _handleInvoke(req: express.Request, res: express.Response) {
     const parsed = invocationsRequestSchema.safeParse(req.body);
     if (!parsed.success) {
       res.status(400).json({
@@ -914,6 +950,24 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
       res.status(400).json({ error: "No agent registered" });
       return;
     }
+
+    // Pre-flight HITL gate. The non-streaming invoke surface has no way to
+    // surface an approval prompt back to the caller and no way to receive
+    // a decision mid-run, so we reject up-front instead of having the
+    // approval gate auto-deny mid-stream (which would leave the caller
+    // with a confusing "denied by user" tool result in the final text).
+    const approvalGated = this.collectApprovalRequiredToolNames(registered);
+    if (approvalGated.length > 0) {
+      res.status(400).json({
+        error:
+          `Agent '${registered.name}' exposes ${approvalGated.length} approval-gated tool(s) ` +
+          `(${approvalGated.join(", ")}); /invocations and /responses are non-streaming and ` +
+          "cannot run HITL. Use POST /chat for HITL-capable agents, or disable approval via " +
+          "agents({ approval: { requireForDestructive: false } }).",
+      });
+      return;
+    }
+
     const userId = this.resolveUserId(req);
 
     // Match the rate-limit gate on /chat. Without this, a client can bypass
@@ -962,7 +1016,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
       return;
     }
 
-    return this._streamAgent(req, res, registered, thread, userId);
+    return this._runAgentNonStreaming(req, res, registered, thread, userId);
   }
 
   private async _streamAgent(
@@ -1123,6 +1177,159 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
     );
   }
 
+  /**
+   * Non-streaming counterpart to {@link _streamAgent} used by `/invocations`
+   * and `/responses`. Drives the adapter to completion, persists the
+   * assistant turn to the thread store, and returns a single JSON envelope
+   * shaped like the OpenAI Responses non-streaming API.
+   *
+   * No `EventChannel`, no `AgentEventTranslator`, no SSE — the caller is
+   * waiting on one HTTP response. The approval gate is force-disabled in
+   * the per-run state as defense-in-depth: `_handleInvoke` already rejects
+   * up-front if any tool in scope would require approval, but pinning
+   * `requireForDestructive: false` here means a tool that somehow slips
+   * past the precheck (e.g. annotations mutated at runtime) still won't
+   * stall the request waiting for an approval prompt that no one can
+   * answer.
+   *
+   * The `RunState` shape is otherwise unchanged so {@link dispatchToolCall}
+   * — including sub-agent recursion via {@link runSubAgent} — keeps the
+   * same tool-call budget, abort signal, and timeout enforcement as the
+   * streaming path. A still-typed translator is constructed but only
+   * consulted for `finalize()` so any in-flight `approval_pending` event
+   * synthesis (which would have been a coding bug given the precheck) is
+   * a dropped no-op instead of a runtime crash.
+   */
+  private async _runAgentNonStreaming(
+    req: express.Request,
+    res: express.Response,
+    registered: RegisteredAgent,
+    thread: Thread,
+    userId: string,
+  ): Promise<void> {
+    const abortController = new AbortController();
+    const signal = abortController.signal;
+    const requestId = randomUUID();
+    this.trackStream(requestId, userId, abortController);
+
+    const tools = Array.from(registered.toolIndex.values()).map((e) => e.def);
+    const limits = this.resolvedLimits;
+
+    const runState: RunState = {
+      req,
+      userId,
+      requestId,
+      abortController,
+      signal,
+      // Force approval off for the non-streaming invoke surface. The
+      // precheck in `_handleInvoke` already guarantees no approval-gated
+      // tool is reachable; this is belt-and-braces.
+      approvalPolicy: { requireForDestructive: false, timeoutMs: 0 },
+      limits,
+      translator: new AgentEventTranslator(),
+      outboundEvents: new EventChannel<ResponseStreamEvent>(),
+      toolCallsUsed: { count: 0 },
+    };
+
+    const executeTool = (name: string, args: unknown): Promise<unknown> =>
+      this.dispatchToolCall(runState, registered.toolIndex, name, args, 0);
+
+    let fullContent = "";
+    try {
+      const pluginNames = this.context
+        ? this.context
+            .getPluginNames()
+            .filter((n) => n !== this.name && n !== "server")
+        : [];
+      const fullPrompt = composePromptForAgent(
+        registered,
+        this.config.baseSystemPrompt,
+        {
+          agentName: registered.name,
+          pluginNames,
+          toolNames: tools.map((t) => t.name),
+        },
+      );
+
+      const messagesWithSystem: Message[] = [
+        {
+          id: "system",
+          role: "system",
+          content: fullPrompt,
+          createdAt: new Date(),
+        },
+        ...thread.messages,
+      ];
+
+      const stream = registered.adapter.run(
+        {
+          messages: messagesWithSystem,
+          tools,
+          threadId: thread.id,
+          signal,
+        },
+        { executeTool, signal },
+      );
+
+      fullContent = await consumeAdapterStream(stream, { signal });
+
+      if (fullContent) {
+        await this.threadStore.addMessage(thread.id, userId, {
+          id: randomUUID(),
+          role: "assistant",
+          content: fullContent,
+          createdAt: new Date(),
+        });
+      }
+    } catch (error) {
+      if (signal.aborted) {
+        res.status(499).json({ error: "Request aborted" });
+        return;
+      }
+      logger.error("Agent invoke error: %O", error);
+      const message =
+        process.env.NODE_ENV === "production"
+          ? "Internal server error"
+          : error instanceof Error
+            ? error.message
+            : String(error);
+      res.status(500).json({ error: message });
+      return;
+    } finally {
+      this.approvalGate.abortStream(requestId);
+      this.untrackStream(requestId);
+      if (registered.ephemeral) {
+        try {
+          await this.threadStore.delete(thread.id, userId);
+        } catch (err) {
+          logger.warn(
+            "Failed to delete ephemeral thread %s: %O",
+            thread.id,
+            err,
+          );
+        }
+      }
+    }
+
+    const responseId = `resp_${randomUUID()}`;
+    const messageId = `msg_${randomUUID()}`;
+    const message: ResponseOutputMessage = {
+      type: "message",
+      id: messageId,
+      status: "completed",
+      role: "assistant",
+      content: [{ type: "output_text", text: fullContent }],
+    };
+    res.json({
+      id: responseId,
+      object: "response",
+      created_at: Math.floor(Date.now() / 1000),
+      status: "completed",
+      thread_id: thread.id,
+      output: [message],
+    });
+  }
+
   /**
    * Dispatch a single tool call from either the top-level adapter or a
    * sub-agent. Centralising this in one method is what makes the budget
@@ -1528,7 +1735,8 @@ function composePromptForAgent(
 /**
  * Plugin factory for the agents plugin. Reads `config/agents/*.md` by default,
  * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*`
- * runtime API and mounts `/invocations`.
+ * runtime API and mounts `POST /invocations` and `POST /responses` (aliased
+ * non-streaming invoke endpoints) plus `POST /chat` (streaming, HITL-capable).
  *
  * @example
  * ```ts
diff --git a/packages/appkit/src/plugins/agents/schemas.ts b/packages/appkit/src/plugins/agents/schemas.ts
index 87268421..395a8eab 100644
--- a/packages/appkit/src/plugins/agents/schemas.ts
+++ b/packages/appkit/src/plugins/agents/schemas.ts
@@ -58,7 +58,6 @@ export const invocationsRequestSchema = z.object({
         `input array exceeds the ${MAX_INVOCATIONS_INPUT_ITEMS}-item limit`,
       ),
   ]),
-  stream: z.boolean().optional().default(true),
   model: z.string().optional(),
 });
 
diff --git a/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts
index e2bbcbe9..aecf75fb 100644
--- a/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts
+++ b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts
@@ -258,12 +258,12 @@ describe("POST /chat — per-user concurrent-stream limit", () => {
     const { res, setHeader, json } = mockRes();
     await (
       plugin as unknown as {
-        _handleInvocations: (
+        _handleInvoke: (
           r: express.Request,
           w: express.Response,
         ) => Promise<void>;
       }
-    )._handleInvocations(mockReq({ input: "hi" }, "alice"), res);
+    )._handleInvoke(mockReq({ input: "hi" }, "alice"), res);
 
     expect(res.status).toHaveBeenCalledWith(429);
     expect(setHeader).toHaveBeenCalledWith("Retry-After", "5");
diff --git a/packages/appkit/src/plugins/agents/tests/route-handler-errors.test.ts b/packages/appkit/src/plugins/agents/tests/route-handler-errors.test.ts
index e654f5ca..9ea5cb2b 100644
--- a/packages/appkit/src/plugins/agents/tests/route-handler-errors.test.ts
+++ b/packages/appkit/src/plugins/agents/tests/route-handler-errors.test.ts
@@ -6,10 +6,16 @@ import { AgentsPlugin } from "../agents";
 /**
  * Surface-level guarantees on the agents plugin's HTTP route handlers when
  * downstream dependencies fail. Prior to PR #305 review finding #1+#2,
- * `_handleChat` and `_handleInvocations` awaited `threadStore` without a
- * try/catch — a backing-store failure (DB unreachable, permission error)
- * would propagate the rejection without writing a response and the SSE
- * client would hang until the upstream proxy timeout.
+ * `_handleChat` and `_handleInvoke` (then `_handleInvocations`) awaited
+ * `threadStore` without a try/catch — a backing-store failure (DB
+ * unreachable, permission error) would propagate the rejection without
+ * writing a response and the client would hang until the upstream proxy
+ * timeout.
+ *
+ * Also covers the HITL pre-flight gate on `/invocations` and `/responses`:
+ * the non-streaming invoke surface cannot run approval prompts mid-call,
+ * so an agent whose tool surface contains approval-gated tools must be
+ * rejected up-front with HTTP 400.
  */
 
 beforeEach(() => {
@@ -141,12 +147,12 @@ describe("POST /invocations — threadStore failure", () => {
     const { res, json } = mockRes();
     await (
       plugin as unknown as {
-        _handleInvocations: (
+        _handleInvoke: (
           r: express.Request,
           w: express.Response,
         ) => Promise<void>;
       }
-    )._handleInvocations(mockReq({ input: "hi" }), res);
+    )._handleInvoke(mockReq({ input: "hi" }), res);
 
     expect(res.status).toHaveBeenCalledWith(500);
     expect(json).toHaveBeenCalledWith({ error: "Thread operation failed" });
@@ -166,12 +172,12 @@ describe("POST /invocations — threadStore failure", () => {
     const { res, json } = mockRes();
     await (
       plugin as unknown as {
-        _handleInvocations: (
+        _handleInvoke: (
           r: express.Request,
           w: express.Response,
         ) => Promise<void>;
       }
-    )._handleInvocations(
+    )._handleInvoke(
       mockReq({
         input: [
           { role: "user", content: "first" },
@@ -185,3 +191,231 @@ describe("POST /invocations — threadStore failure", () => {
     expect(json).toHaveBeenCalledWith({ error: "Thread operation failed" });
   });
 });
+
+describe("POST /invocations & /responses — HITL pre-flight", () => {
+  function seedPluginWithTools(
+    toolAnnotations: Record<string, unknown>,
+    overrides: ConstructorParameters<typeof AgentsPlugin>[0] = { dir: false },
+  ): AgentsPlugin {
+    const plugin = new AgentsPlugin(overrides);
+    const toolIndex = new Map();
+    toolIndex.set("dangerous_tool", {
+      source: "function",
+      def: {
+        name: "dangerous_tool",
+        description: "writes things",
+        parameters: { type: "object", properties: {} },
+        annotations: toolAnnotations,
+      },
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: seed private state
+    (plugin as any).agents.set("default", {
+      name: "default",
+      instructions: "hi",
+      adapter: { async *run() {} },
+      toolIndex,
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: seed private state
+    (plugin as any).defaultAgentName = "default";
+    return plugin;
+  }
+
+  test("rejects with 400 when a tool has effect: destructive", async () => {
+    const plugin = seedPluginWithTools({ effect: "destructive" });
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleInvoke: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleInvoke(mockReq({ input: "hi" }), res);
+
+    expect(res.status).toHaveBeenCalledWith(400);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({
+        error: expect.stringMatching(/dangerous_tool/),
+      }),
+    );
+  });
+
+  test("rejects with 400 when a tool has legacy destructive: true", async () => {
+    const plugin = seedPluginWithTools({ destructive: true });
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleInvoke: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleInvoke(mockReq({ input: "hi" }), res);
+
+    expect(res.status).toHaveBeenCalledWith(400);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({
+        error: expect.stringMatching(/approval-gated tool/),
+      }),
+    );
+  });
+
+  test("rejects with 400 when a tool has effect: write or update", async () => {
+    for (const effect of ["write", "update"] as const) {
+      const plugin = seedPluginWithTools({ effect });
+      const { res } = mockRes();
+      await (
+        plugin as unknown as {
+          _handleInvoke: (
+            r: express.Request,
+            w: express.Response,
+          ) => Promise<void>;
+        }
+      )._handleInvoke(mockReq({ input: "hi" }), res);
+
+      expect(res.status).toHaveBeenCalledWith(400);
+    }
+  });
+
+  test("passes pre-flight when approval.requireForDestructive is disabled", async () => {
+    const plugin = seedPluginWithTools(
+      { effect: "destructive" },
+      { dir: false, approval: { requireForDestructive: false } },
+    );
+    // biome-ignore lint/suspicious/noExplicitAny: stub the downstream runner to avoid running the adapter
+    (plugin as any)._runAgentNonStreaming = vi.fn(async () => undefined);
+    // biome-ignore lint/suspicious/noExplicitAny: stub
+    (plugin as any).threadStore = {
+      create: vi.fn().mockResolvedValue({ id: "t-1", messages: [] }),
+      addMessage: vi.fn(),
+    };
+
+    const { res } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleInvoke: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleInvoke(mockReq({ input: "hi" }), res);
+
+    expect(res.status).not.toHaveBeenCalledWith(400);
+    // biome-ignore lint/suspicious/noExplicitAny: assert delegation
+    expect((plugin as any)._runAgentNonStreaming).toHaveBeenCalled();
+  });
+
+  test("passes pre-flight when the agent has only read-only tools", async () => {
+    const plugin = seedPluginWithTools({ effect: "read" });
+    // biome-ignore lint/suspicious/noExplicitAny: stub the runner
+    (plugin as any)._runAgentNonStreaming = vi.fn(async () => undefined);
+    // biome-ignore lint/suspicious/noExplicitAny: stub
+    (plugin as any).threadStore = {
+      create: vi.fn().mockResolvedValue({ id: "t-1", messages: [] }),
+      addMessage: vi.fn(),
+    };
+
+    const { res } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleInvoke: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleInvoke(mockReq({ input: "hi" }), res);
+
+    expect(res.status).not.toHaveBeenCalledWith(400);
+    // biome-ignore lint/suspicious/noExplicitAny: assert delegation
+    expect((plugin as any)._runAgentNonStreaming).toHaveBeenCalled();
+  });
+});
+
+describe("POST /invocations & /responses — successful invoke", () => {
+  test("returns OpenAI Responses-shaped JSON with aggregated assistant text", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    // biome-ignore lint/suspicious/noExplicitAny: seed
+    (plugin as any).agents.set("default", {
+      name: "default",
+      instructions: "hi",
+      adapter: {
+        async *run() {
+          yield { type: "message_delta", content: "hello " };
+          yield { type: "message_delta", content: "world" };
+        },
+      },
+      toolIndex: new Map(),
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: seed
+    (plugin as any).defaultAgentName = "default";
+    // biome-ignore lint/suspicious/noExplicitAny: stub
+    (plugin as any).threadStore = {
+      create: vi.fn().mockResolvedValue({ id: "t-new", messages: [] }),
+      addMessage: vi.fn(),
+      delete: vi.fn(),
+    };
+
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleInvoke: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleInvoke(mockReq({ input: "hi" }), res);
+
+    expect(res.status).not.toHaveBeenCalledWith(500);
+    expect(json).toHaveBeenCalledTimes(1);
+    const payload = json.mock.calls[0]?.[0] as {
+      id: string;
+      object: string;
+      status: string;
+      thread_id: string;
+      output: Array<{
+        type: string;
+        role: string;
+        content: Array<{ type: string; text: string }>;
+      }>;
+    };
+    expect(payload.object).toBe("response");
+    expect(payload.status).toBe("completed");
+    expect(payload.thread_id).toBe("t-new");
+    expect(payload.id).toMatch(/^resp_/);
+    expect(payload.output).toHaveLength(1);
+    expect(payload.output[0]?.type).toBe("message");
+    expect(payload.output[0]?.role).toBe("assistant");
+    expect(payload.output[0]?.content[0]).toEqual({
+      type: "output_text",
+      text: "hello world",
+    });
+  });
+});
+
+describe("/invocations and /responses are aliases", () => {
+  test("both routes are registered and bound to the same handler", () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    const addRoute = vi.fn();
+    // biome-ignore lint/suspicious/noExplicitAny: inject minimal fake context
+    (plugin as any).context = { addRoute };
+    // biome-ignore lint/suspicious/noExplicitAny: invoke private mounter
+    (plugin as any).mountInvokeRoutes();
+
+    expect(addRoute).toHaveBeenCalledTimes(2);
+    const calls = addRoute.mock.calls.map((c: unknown[]) => ({
+      method: c[0],
+      path: c[1],
+      handler: c[2],
+    }));
+    const invocations = calls.find(
+      (c: { path: unknown }) => c.path === "/invocations",
+    );
+    const responses = calls.find(
+      (c: { path: unknown }) => c.path === "/responses",
+    );
+    expect(invocations?.method).toBe("post");
+    expect(responses?.method).toBe("post");
+    // The two routes are aliases — same handler reference is mounted on both.
+    expect(invocations?.handler).toBe(responses?.handler);
+  });
+});