docker · trungutt · Jun 16, 2026 · Jun 22, 2026
@@ -50,6 +50,7 @@ All endpoints are under the `/api` prefix.
 | `DELETE` | `/api/sessions/:id`                 | Delete a session                                        |
 | `PATCH`  | `/api/sessions/:id/title`           | Update session title                                    |
 | `PATCH`  | `/api/sessions/:id/permissions`     | Update session permissions                              |
+| `PATCH`  | `/api/sessions/:id/mode`            | Switch the session between `build` (default) and `plan` mode — see [Plan mode](#plan-mode) |
 | `POST`   | `/api/sessions/:id/resume`          | Resume a paused session (after tool confirmation)       |
 | `POST`   | `/api/sessions/:id/tools/toggle`    | Toggle auto-approve (YOLO) mode                         |
 | `POST`   | `/api/sessions/:id/elicitation`     | Respond to an MCP tool elicitation request              |
@@ -204,6 +205,30 @@ By default, tool calls require approval. In the API workflow:
 
 Toggle auto-approve with `POST /api/sessions/:id/tools/toggle` for automated workflows.
 
+## Plan mode {#plan-mode}
+
+Each session has an interaction `mode` that controls what the agent is allowed to do during a turn:
+
+- `build` (default) — the agent has its full toolset.
+- `plan` — the runtime hides every tool that isn't tagged with the MCP-spec `ReadOnlyHint` annotation, and splices a per-turn system reminder telling the agent to draft a plan instead of acting. Use this when you want the agent to research and propose changes before the user authorises execution.
+
+The mode is server-scoped session state, persisted alongside the rest of the session.
+
+**Setting the mode**
+
+- At create time: `POST /api/sessions` with `{ "mode": "plan" }` in the body. Empty / omitted means `build`. Unknown values are rejected with `400`.
+- Mid-session: `PATCH /api/sessions/:id/mode` with `{ "mode": "plan" }` or `{ "mode": "build" }`. The new mode applies on the **next** turn — an in-flight turn finishes under the mode it started with. Responds with `{ "id": "...", "mode": "..." }`.
+
+The current mode is included in `GET /api/sessions/:id` and `GET /api/sessions/:id/snapshot` responses as the top-level `mode` field.
+
+**Inheritance**
+
+Sub-sessions created by delegation tools (`transfer_task`, `run_skill`, the `agent` background-agent builtin) inherit the parent's mode, so a plan-mode parent can't bypass the filter by delegating to a child that would otherwise default to `build`.
+
+**Harness-backed agents**
+
+Plan mode is not supported for agents that delegate the whole turn to an external coding harness (`agent.harness` set in the YAML): the harness manages its own toolset, so the runtime cannot enforce the read-only filter. Attempting to run a harness agent while the session is in plan mode produces an `error` event with `code: "unsupported_mode"` — switch back to `build` first, or pick a non-harness agent.
+
 ## Driving a running TUI with `--listen` {#listen}
 
 The same session API can be exposed by an **interactive run** so an external

@@ -135,13 +135,25 @@ type SessionResponse struct {
 	OutputTokens  int64                      `json:"output_tokens"`
 	WorkingDir    string                     `json:"working_dir,omitempty"`
 	Permissions   *session.PermissionsConfig `json:"permissions,omitempty"`
+	Mode          session.Mode               `json:"mode,omitempty"`
 }
 
 // UpdateSessionPermissionsRequest represents a request to update session permissions.
 type UpdateSessionPermissionsRequest struct {
 	Permissions *session.PermissionsConfig `json:"permissions"`
 }
 
+// UpdateSessionModeRequest represents a request to update a session's mode.
+type UpdateSessionModeRequest struct {
+	Mode session.Mode `json:"mode"`
+}
+
+// UpdateSessionModeResponse represents the response from updating a session's mode.
+type UpdateSessionModeResponse struct {
+	ID   string       `json:"id"`
+	Mode session.Mode `json:"mode"`
+}
+
 // ResumeSessionRequest represents a request to resume a session
 type ResumeSessionRequest struct {
 	Confirmation string `json:"confirmation"`
@@ -304,6 +316,7 @@ type SessionSnapshotResponse struct {
 	Messages      []session.Message          `json:"messages"`
 	ToolsApproved bool                       `json:"tools_approved"`
 	Permissions   *session.PermissionsConfig `json:"permissions,omitempty"`
+	Mode          session.Mode               `json:"mode,omitempty"`
 	InputTokens   int64                      `json:"input_tokens"`
 	OutputTokens  int64                      `json:"output_tokens"`
 

@@ -169,6 +169,20 @@ func newSubSession(parent *session.Session, cfg SubSessionConfig, childAgent *ag
 		session.WithSendUserMessage(false),
 		session.WithParentID(parent.ID),
 		session.WithAttachedFiles(attachedFiles),
+		// Propagate the parent's interaction mode so that plan mode is
+		// not bypassable via delegation: transfer_task / handoff / the
+		// agent builtin are read-only and survive plan-mode tool
+		// filtering, but without this line the child session would
+		// default back to build mode and the child agent would get
+		// every mutating tool. Inheriting the parent's mode preserves
+		// the "hard tool removal" guarantee across the whole delegation
+		// tree (sub-skills, transferred tasks, background agents).
+		//
+		// LoadMode (not direct field access) because the parent's
+		// mode may be flipped concurrently by PATCH
+		// /sessions/:id/mode while the parent's turn is still
+		// running.
+		session.WithMode(parent.LoadMode()),
 	}
 	if cfg.PinAgent {
 		opts = append(opts, session.WithAgentName(cfg.AgentName))

@@ -148,6 +148,30 @@ func TestNewSubSession(t *testing.T) {
 		// We can verify the user message is still the default.
 		assert.Equal(t, "Please proceed.", s.GetLastUserMessageContent())
 	})
+
+	t.Run("inherits parent mode (build)", func(t *testing.T) {
+		// Default-mode parent should produce a build-mode child. This
+		// is the trivial case but documents the invariant.
+		buildParent := session.New(session.WithUserMessage("hello"))
+		s := newSubSession(buildParent, SubSessionConfig{Task: "t"}, childAgent)
+		assert.Equal(t, session.ModeBuild, s.Mode)
+	})
+
+	t.Run("inherits parent mode (plan)", func(t *testing.T) {
+		// Regression test for the plan-mode delegation bypass: a
+		// plan-mode parent must produce plan-mode children, so that
+		// downstream filterToolsForSession strips mutating tools from
+		// the child's toolset. Without WithMode(parent.Mode) in
+		// newSubSession the child would default back to build and a
+		// plan-mode agent could route around the filter via
+		// transfer_task / run_skill / the agent builtin.
+		planParent := session.New(
+			session.WithUserMessage("hello"),
+			session.WithMode(session.ModePlan),
+		)
+		s := newSubSession(planParent, SubSessionConfig{Task: "t"}, childAgent)
+		assert.Equal(t, session.ModePlan, s.Mode)
+	})
 }
 
 func TestSubSessionConfig_DefaultValues(t *testing.T) {

@@ -234,6 +234,14 @@ const (
 	ErrorCodeToolFailed      = "tool_failed"
 	ErrorCodeHookBlocked     = "hook_blocked"
 	ErrorCodeLoopDetected    = "loop_detected"
+	// ErrorCodeUnsupportedMode signals that the session's current Mode
+	// (e.g. plan) is incompatible with the agent that's about to run.
+	// Today this only fires when a plan-mode session tries to run a
+	// harness-backed agent: the runtime can't enforce plan mode's
+	// read-only tool filter for harness agents because the harness
+	// owns its toolset, so the turn is refused instead of running with
+	// a partial (advisory-only) guarantee.
+	ErrorCodeUnsupportedMode = "unsupported_mode"
 )
 
 type ErrorEvent struct {

@@ -24,6 +24,22 @@ func (r *LocalRuntime) runHarnessAgent(ctx context.Context, sess *session.Sessio
 	ctx, span := r.startSpan(ctx, "runtime.harness", trace.WithAttributes(traceAttributesForHarness(sess, a)...))
 	defer span.End()
 
+	// Plan mode's hard guarantee — every non-read-only tool is stripped
+	// from the model's toolset — relies on the runtime owning the
+	// toolset. Harness agents delegate the whole turn (tools included)
+	// to an external library, so we can't enforce the filter here.
+	// Rather than degrade plan mode to an advisory prompt (which the
+	// reminder text explicitly contradicts), refuse the turn so the
+	// user can either switch to build mode or pick a non-harness
+	// agent.
+	if sess.LoadMode() == session.ModePlan {
+		msg := fmt.Sprintf("plan mode is not supported for harness-backed agents (%q): the harness manages its own toolset, so the read-only tool filter cannot be enforced. Switch back to build mode to run this agent.", a.Name())
+		events.Emit(ErrorWithCode(ErrorCodeUnsupportedMode, msg))
+		r.notifyError(ctx, a, sess.ID, msg)
+		span.SetStatus(codes.Error, "plan mode unsupported for harness agent")
+		return turnEndReasonError
+	}
+
 	provider, err := codingharness.NewProvider(a.Harness())
 	if err != nil {
 		msg := fmt.Sprintf("failed to configure harness: %v", err)
@@ -46,6 +62,10 @@ func (r *LocalRuntime) runHarnessAgent(ctx context.Context, sess *session.Sessio
 	}()
 
 	turnStartMsgs := r.executeTurnStartHooks(ctx, sess, a, events)
+	// No plan-mode reminder spliced here: plan mode is refused for
+	// harness agents above, so by the time we reach this point
+	// sess.Mode is guaranteed to be build (or empty, which normalises
+	// to build).
 	messages := sess.GetMessages(a, append(baseExtra, turnStartMsgs...)...)
 	stop, msg, rewritten := r.executeBeforeLLMCallHooks(ctx, sess, a, modelID, 1, messages)
 	if stop {

@@ -167,6 +167,49 @@ printf '%s\n' '{"type":"result","result":"Hello world"}'
 	assert.Equal(t, []string{"Hello", " world"}, chunks)
 }
 
+// TestHarnessAgentRefusesPlanMode pins the plan-mode-vs-harness invariant:
+// the runtime owns the toolset in the normal LLM loop and can strip
+// non-read-only tools, but a harness-backed agent delegates the whole
+// turn (tools included) to an external library. Rather than degrade
+// plan mode to "advisory prompt only" — which the reminder text
+// explicitly contradicts — the runtime refuses the turn and surfaces
+// an unsupported_mode error so the user can switch back to build mode
+// or pick a non-harness agent.
+func TestHarnessAgentRefusesPlanMode(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	binDir := t.TempDir()
+	// Intentionally produces output that would normally be surfaced as
+	// an assistant message; the test asserts that the harness never
+	// runs, so this output should be dropped.
+	writeHarnessScript(t, binDir, "codex", `#!/bin/sh
+printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"this should not appear"}}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	rt := newHarnessRuntime(t, "codex")
+	sess := session.New(
+		session.WithUserMessage("do the task"),
+		session.WithMode(session.ModePlan),
+	)
+	events := collectRuntimeEvents(t, rt, sess)
+
+	var errEvent *ErrorEvent
+	for _, ev := range events {
+		if e, ok := ev.(*ErrorEvent); ok {
+			errEvent = e
+			break
+		}
+	}
+	require.NotNil(t, errEvent, "expected ErrorEvent rejecting plan mode for harness agent")
+	assert.Equal(t, ErrorCodeUnsupportedMode, errEvent.Code)
+	assert.Contains(t, errEvent.Error, "plan mode")
+	// Harness must not have produced any assistant content.
+	assert.Empty(t, sess.GetLastAssistantMessageContent())
+}
+
 func writeHarnessScript(t *testing.T, dir, name, content string) {
 	t.Helper()
 	require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0o755))

@@ -266,7 +266,7 @@ func (r *LocalRuntime) runStreamLoop(ctx context.Context, sess *session.Session,
 		sink.Emit(ErrorWithCode(ErrorCodeToolFailed, fmt.Sprintf("failed to get tools: %v", err)))
 		return
 	}
-	agentTools = filterExcludedTools(agentTools, sess.ExcludedTools)
+	agentTools = filterToolsForSession(agentTools, sess)
 
 	sink.Emit(ToolsetInfo(len(agentTools), false, a.Name()))
 
@@ -348,7 +348,7 @@ func (r *LocalRuntime) runStreamLoop(ctx context.Context, sess *session.Session,
 			sink.Emit(ErrorWithCode(ErrorCodeToolFailed, fmt.Sprintf("failed to get tools: %v", err)))
 			return
 		}
-		agentTools = filterExcludedTools(agentTools, sess.ExcludedTools)
+		agentTools = filterToolsForSession(agentTools, sess)
 
 		// Emit updated tool count. After a ToolListChanged MCP notification
 		// the cache is invalidated, so getTools above re-fetches from the
@@ -554,7 +554,13 @@ func (r *LocalRuntime) runTurn(
 	// files) refresh every turn while session-level context (cwd, OS,
 	// arch) stays stable — all without bloating the stored history.
 	turnStartMsgs := r.executeTurnStartHooks(ctx, sess, a, events)
-	messages := sess.GetMessages(a, slices.Concat(ls.sessionStartMsgs, ls.userPromptMsgs, turnStartMsgs)...)
+	// Plan-mode reminder rides alongside the turn_start hook output so it
+	// participates in the same per-turn splice (and the cache_control marker
+	// that GetMessages applies to the last extra). It is appended last so its
+	// instruction is the most recent system context the model sees before the
+	// user prompt — minimising the chance the model ignores it.
+	planReminder := planModeReminderMessages(sess)
+	messages := sess.GetMessages(a, slices.Concat(ls.sessionStartMsgs, ls.userPromptMsgs, turnStartMsgs, planReminder)...)
 	slog.DebugContext(ctx, "Retrieved messages for processing", "agent", a.Name(), "message_count", len(messages))
 
 	// before_llm_call hooks fire just before the model is invoked.
@@ -990,6 +996,35 @@ func filterExcludedTools(agentTools []tools.Tool, excluded []string) []tools.Too
 	return filtered
 }
 
+// filterToolsForSession applies all session-level tool filters: the explicit
+// ExcludedTools name list (used by skill sub-sessions) and, when the session
+// is in plan mode, anything whose tool definition doesn't advertise
+// ReadOnlyHint. The MCP spec's ReadOnlyHint is the canonical "this tool has
+// no side effects" signal, so it's the right knob for plan mode and it
+// extends naturally to user-added MCP tools without any per-tool config.
+func filterToolsForSession(agentTools []tools.Tool, sess *session.Session) []tools.Tool {
+	out := filterExcludedTools(agentTools, sess.ExcludedTools)
+	// LoadMode rather than direct field access: PATCH /sessions/:id/mode
+	// may flip Mode concurrently with the runtime stream goroutine.
+	if sess.LoadMode() == session.ModePlan {
+		out = filterToReadOnlyTools(out)
+	}
+	return out
+}
+
+// filterToReadOnlyTools keeps only tools whose definition advertises
+// ReadOnlyHint. Used by plan mode to hide every write/execute tool from the
+// model so it can't reach for them even if the system reminder is ignored.
+func filterToReadOnlyTools(agentTools []tools.Tool) []tools.Tool {
+	filtered := make([]tools.Tool, 0, len(agentTools))
+	for _, t := range agentTools {
+		if t.Annotations.ReadOnlyHint {
+			filtered = append(filtered, t)
+		}
+	}
+	return filtered
+}
+
 // reprobe re-runs ensureToolSetsAreStarted after a batch of tool calls.
 // If new tools became available (by name-set diff), it emits a ToolsetInfo
 // event to update the TUI immediately. The new tools will be picked up by
@@ -1010,7 +1045,7 @@ func (r *LocalRuntime) reprobe(
 		slog.WarnContext(ctx, "reprobe: getTools failed", "agent", a.Name(), "error", err)
 		return
 	}
-	updated = filterExcludedTools(updated, sess.ExcludedTools)
+	updated = filterToolsForSession(updated, sess)
 
 	// Emit any pending warnings that getTools just generated.
 	r.emitAgentWarnings(a, events)

@@ -0,0 +1,48 @@
+package runtime
+
+import (
+	"github.com/docker/docker-agent/pkg/chat"
+	"github.com/docker/docker-agent/pkg/session"
+)
+
+// planModeReminder is the per-turn system instruction injected when a session
+// is in plan mode. Two layers enforce plan mode: the runtime hides every
+// non-read-only tool from the model (see filterToolsForSession in loop.go),
+// and this reminder tells the model how it should behave. Hiding the tools
+// is the hard guarantee; the reminder is the explanation, so the model
+// produces a useful plan instead of just bouncing off missing tools.
+const planModeReminder = `<system-reminder>
+You are currently in PLAN MODE.
+
+In this mode you research the codebase, ask clarifying questions, and write a
+clear, actionable plan for the user. You MUST NOT make any changes to the
+system:
+
+- No edits to files (no write, edit, create, or delete).
+- No shell commands or background jobs.
+- No state-changing tool calls of any kind.
+
+Only read-only tools have been made available to you for this turn. If you try
+to call a tool that isn't in your list, the user has explicitly disabled it
+for planning.
+
+End the turn by presenting the plan in your final message and asking the user
+to review it. The user will switch you to BUILD MODE when they want execution
+to begin.
+</system-reminder>`
+
+// planModeReminderMessages returns the system-reminder messages to splice
+// before the conversation history when sess is in plan mode. Returns nil for
+// other modes so callers can use it unconditionally.
+//
+// Reads mode via LoadMode so it stays consistent with concurrent
+// PATCH /sessions/:id/mode writes coming through SessionManager.
+func planModeReminderMessages(sess *session.Session) []chat.Message {
+	if sess == nil || sess.LoadMode() != session.ModePlan {
+		return nil
+	}
+	return []chat.Message{{
+		Role:    chat.MessageRoleSystem,
+		Content: planModeReminder,
+	}}
+}