From 3ae80e48b7a64a208c9b9d727fdf6f313e281949 Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 01:44:01 +0200
Subject: [PATCH 1/8] =?UTF-8?q?v4-a:=20mem::lineage=20=E2=80=94=20concept?=
 =?UTF-8?q?=20lineage=20retrieval=20primitive?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Returns chronologically-sorted hits across observation/memory/lesson/
summary channels — answers "when did this term enter the corpus and
what surrounded it?". Includes BM25 sweep over obs+memory, substring
scan for lessons/summaries, optional adjacent-turn enrichment, and
optional graph-neighbor attachment.

Gap-2 fix bundled: BM25 sweep cap raised from min(limit*4, 500) to
min(limit*20, 5000) so deep in-session refs in large jsonl-imported
sessions (10k+ obs) still rank into the channel-filtered top N.

Wires:
- src/functions/lineage.ts (new)
- mem::lineage MCP tool in CORE_TOOLS
- POST /agentmemory/lineage REST endpoint
- AuditEntry operation: + "query"
- LineageChannel / TimelineItem / LineageGraphNeighbor / LineageResult types
- design + test-case docs under docs/plans/

Counts bumped to keep README/AGENTS/boot message/test in sync:
CORE_TOOLS 12 → 13, total MCP tools 51 → 52, REST endpoints 121 → 122.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 AGENTS.md                                     |   4 +-
 README.md                                     |   8 +-
 docs/plans/v4-lineage-design.md               | 277 +++++++++++
 .../v4-lineage-test-case-careful-generator.md | 200 ++++++++
 src/functions/lineage.ts                      | 455 ++++++++++++++++++
 src/index.ts                                  |   4 +-
 src/mcp/server.ts                             |  35 ++
 src/mcp/tools-registry.ts                     |  33 ++
 src/triggers/api.ts                           |  70 ++-
 src/types.ts                                  |  55 ++-
 test/mcp-standalone.test.ts                   |   4 +-
 11 files changed, 1134 insertions(+), 11 deletions(-)
 create mode 100644 docs/plans/v4-lineage-design.md
 create mode 100644 docs/plans/v4-lineage-test-case-careful-generator.md
 create mode 100644 src/functions/lineage.ts
diff --git a/AGENTS.md b/AGENTS.md
index ebcf3584..6032e3a7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -111,8 +111,8 @@ Hook scripts in `src/hooks/` are standalone Node.js scripts (no iii-sdk import).
 
 ## Current Stats (v0.9.16)
 
-- 53 MCP tools (8 visible by default, `AGENTMEMORY_TOOLS=all` for all)
-- 124 REST endpoints
+- 54 MCP tools (8 visible by default, `AGENTMEMORY_TOOLS=all` for all)
+- 125 REST endpoints
 - 6 MCP resources, 3 MCP prompts
 - 12 hooks, 4 skills
 - 50+ iii functions
diff --git a/README.md b/README.md
index 840a75c4..c3775a11 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@
 <p align="center">
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-recall.svg"><img src="assets/tags/stat-recall.svg" alt="95.2% retrieval R@5" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tokens.svg"><img src="assets/tags/stat-tokens.svg" alt="92% fewer tokens" height="38" /></picture>
-  <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tools.svg"><img src="assets/tags/stat-tools.svg" alt="53 MCP tools" height="38" /></picture>
+  <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tools.svg"><img src="assets/tags/stat-tools.svg" alt="54 MCP tools" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-hooks.svg"><img src="assets/tags/stat-hooks.svg" alt="12 auto hooks" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-deps.svg"><img src="assets/tags/stat-deps.svg" alt="0 external DBs" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tests.svg"><img src="assets/tags/stat-tests.svg" alt="950+ tests passing" height="38" /></picture>
@@ -408,7 +408,7 @@ Implementation details live in `src/cli.ts` (see `runUpgrade` around the `src/cl
 ### Claude Code (one block, paste it)
 
 ```
-Install agentmemory: run `npx @agentmemory/agentmemory` in a separate terminal to start the memory server. Then run `/plugin marketplace add rohitg00/agentmemory` and `/plugin install agentmemory` — the plugin registers all 12 hooks, 4 skills, AND auto-wires the `@agentmemory/mcp` stdio server via its `.mcp.json`, so you get 53 MCP tools (memory_smart_search, memory_save, memory_sessions, memory_governance_delete, etc.) without any extra config step. Verify with `curl http://localhost:3111/agentmemory/health`. The real-time viewer is at http://localhost:3113.
+Install agentmemory: run `npx @agentmemory/agentmemory` in a separate terminal to start the memory server. Then run `/plugin marketplace add rohitg00/agentmemory` and `/plugin install agentmemory` — the plugin registers all 12 hooks, 4 skills, AND auto-wires the `@agentmemory/mcp` stdio server via its `.mcp.json`, so you get 54 MCP tools (memory_smart_search, memory_save, memory_sessions, memory_governance_delete, etc.) without any extra config step. Verify with `curl http://localhost:3111/agentmemory/health`. The real-time viewer is at http://localhost:3113.
 ```
 
 ### Codex CLI (Codex plugin platform)
@@ -799,7 +799,7 @@ npm install @xenova/transformers
 
 <h2 id="mcp-server"><picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/section-mcp.svg"><img src="assets/tags/section-mcp.svg" alt="MCP Server" height="32" /></picture></h2>
 
-53 tools, 6 resources, 3 prompts, and 4 skills — the most comprehensive MCP memory toolkit for any agent.
+54 tools, 6 resources, 3 prompts, and 4 skills — the most comprehensive MCP memory toolkit for any agent.
 
 > **MCP shim vs full server:** the published `@agentmemory/mcp` package is a thin shim. It exposes the full 51-tool surface **only when it can reach a running agentmemory server** via `AGENTMEMORY_URL` (proxy mode). With no server reachable, the shim falls back to a 7-tool local set (`memory_save`, `memory_recall`, `memory_smart_search`, `memory_sessions`, `memory_export`, `memory_audit`, `memory_governance_delete`). The `AGENTMEMORY_TOOLS=core|all` env var is a *server-side* flag — setting it in the shim's `env` block has no effect. If you see only 7 tools in Cursor / OpenCode / Gemini CLI, start `npx @agentmemory/agentmemory` (or the Docker stack) and set `AGENTMEMORY_URL=http://localhost:3111`.
 
@@ -1197,7 +1197,7 @@ Create `~/.agentmemory/.env`:
 
 <h2 id="api"><picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/section-api.svg"><img src="assets/tags/section-api.svg" alt="API" height="32" /></picture></h2>
 
-124 endpoints on port `3111`. The REST API binds to `127.0.0.1` by default. Protected endpoints require `Authorization: Bearer <secret>` when `AGENTMEMORY_SECRET` is set, and mesh sync endpoints require `AGENTMEMORY_SECRET` on both peers.
+125 endpoints on port `3111`. The REST API binds to `127.0.0.1` by default. Protected endpoints require `Authorization: Bearer <secret>` when `AGENTMEMORY_SECRET` is set, and mesh sync endpoints require `AGENTMEMORY_SECRET` on both peers.
 
 <details>
 <summary>Key endpoints</summary>
diff --git a/docs/plans/v4-lineage-design.md b/docs/plans/v4-lineage-design.md
new file mode 100644
index 00000000..f5b3ce6d
--- /dev/null
+++ b/docs/plans/v4-lineage-design.md
@@ -0,0 +1,277 @@
+# v4-A: `mem::lineage` — concept-lineage retrieval primitive
+
+## Problem
+
+Smart-search ranks the **lesson** channel over the **memory** and **observation**
+channels, so queries that target a single inline phrase in a large doc
+(or a turn from a specific past session) are silently dropped from the
+top-K. The data is in the corpus; the *retrieval shape* is missing.
+
+Concrete miss we hit:
+- Query: *"who is the careful generator?"*
+- Truth: `docs/architecture.md:308` defines it as Tier-2 = Qwen3.6-35B-A3B-FP8,
+  and the term was first written into `config/config.yaml` at
+  `2026-04-26T11:39:45` in session `05988a74-...`.
+- Smart-search returned 8 unrelated session-handoff lessons (top score 0.726).
+- Plain `/agentmemory/search` (BM25-only) found the right hits cleanly
+  (score 11–14) — proving the data is there and BM25 indexes it.
+
+The gap is a missing **conceptual-lineage** primitive: *"when did this term
+enter our shared vocabulary, where, and what surrounded it?"*. That's a
+different query shape from relevance-ranked retrieval — it wants
+**chronological order** + **session context** + **adjacent turns**.
+
+## Function: `mem::lineage`
+
+### Request
+
+```json
+POST /agentmemory/lineage
+{
+  "query": "careful generator",
+  "limit": 50,
+  "since": "2026-04-01T00:00:00Z",
+  "until": "2026-05-20T00:00:00Z",
+  "channels": ["observation", "memory", "lesson", "summary"],
+  "includeAdjacentTurns": true,
+  "includeGraph": false,
+  "order": "asc"
+}
+```
+
+Field semantics:
+
+| field | type | default | meaning |
+|---|---|---|---|
+| `query` | string (required) | — | phrase/terms to find. Case-insensitive substring match for lessons/summaries; existing BM25 index handles observations/memories. |
+| `limit` | int | 50 | max items in the returned timeline (after merge + sort) |
+| `since` / `until` | ISO 8601 | unbounded | filter on `createdAt` / `timestamp` |
+| `channels` | array | all four | which content types to search |
+| `includeAdjacentTurns` | bool | `true` | for observation hits, attach the previous user prompt + previous assistant turn from the same session |
+| `includeGraph` | bool | `false` | attach immediate graph-edge neighbors of nodes whose `name` matches the query |
+| `order` | `"asc"` \| `"desc"` | `"asc"` | chronological direction (asc = oldest first, lineage-style) |
+
+### Response
+
+```json
+{
+  "query": "careful generator",
+  "firstMention": {
+    "timestamp": "2026-04-26T11:39:45.123Z",
+    "channel": "observation",
+    "sessionId": "05988a74-d1f1-42a1-9cd4-53b4db205ff3",
+    "project": "gitops-assistant"
+  },
+  "timeline": [
+    {
+      "timestamp": "2026-04-26T11:39:45.123Z",
+      "channel": "observation",
+      "id": "obs_mp...",
+      "sessionId": "05988a74-d1f1-42a1-9cd4-53b4db205ff3",
+      "project": "gitops-assistant",
+      "title": "post_tool_use",
+      "type": "other",
+      "snippet": "...Tier 2 — careful generator (Qwen3.6-35B-A3B-FP8 on vast pod)\n  analyse_manifest:  vast-qwen...",
+      "score": 12.4,
+      "session": {
+        "id": "05988a74-...",
+        "project": "gitops-assistant",
+        "startedAt": "2026-04-26T09:06:36.534Z",
+        "firstPrompt": "I need an implementation plan for wiring..."
+      },
+      "adjacentTurns": {
+        "previousUserPrompt": "...",
+        "previousAssistantSummary": "..."
+      }
+    },
+    {
+      "timestamp": "2026-05-19T00:36:09.232Z",
+      "channel": "memory",
+      "id": "mem_mp...",
+      "title": "[Repo doc] gitops-assistant: docs/architecture.md (chunk 1/1...)",
+      "snippet": "...# Tier 2 — careful generator\nanalyse_manifest:  vast-qwen36-35b...",
+      "score": 7.1,
+      "sourceFile": "docs/architecture.md",
+      "memoryType": "architecture"
+    }
+  ],
+  "totalsByChannel": {
+    "observation": 12,
+    "memory": 3,
+    "lesson": 0,
+    "summary": 1
+  },
+  "graphNeighbors": [
+    {
+      "name": "careful generator",
+      "type": "concept",
+      "edges": [
+        { "kind": "uses", "neighbor": "vast-qwen36-35b", "neighborType": "library" },
+        { "kind": "related_to", "neighbor": "analyse_manifest", "neighborType": "function" }
+      ]
+    }
+  ]
+}
+```
+
+Notes:
+- `firstMention` is the earliest item in the timeline (after filtering),
+  surfaced separately for convenience.
+- `graphNeighbors` only present when `includeGraph: true`.
+- `adjacentTurns` only present when `includeAdjacentTurns: true` AND the
+  channel is `observation` AND a prior turn exists in the same session.
+
+## Algorithm
+
+```
+1. Match by channel (parallel):
+   a) observation & memory:
+      - reuse the existing BM25 index from src/functions/search.ts.
+        Call getSearchIndex().search(query, max=200) or equivalent.
+        Filter by `channels` setting.
+      - existing index already returns timestamp + sessionId for
+        observations; memory entries carry createdAt + id.
+   b) lesson:
+      - kv.list<Lesson>(KV.lessons)
+      - filter: !lesson.deleted && lesson.content.toLowerCase().includes(qLower)
+      - ~4500 lessons; substring scan is ~10ms
+   c) summary:
+      - kv.list<SessionSummary>(KV.summaries)
+      - filter on .narrative substring
+      - ~60 records; trivial
+
+2. For each hit, build a TimelineItem with:
+     timestamp, channel, id, score (BM25 if available, else 0),
+     snippet (300-char window centered on first match position;
+     clip at content boundaries; "..." prefix/suffix elision).
+
+3. Apply since/until filters.
+
+4. Merge channels, sort by timestamp (asc by default), trim to limit.
+
+5. Enrichment pass:
+   a) Session lookup cache (Map<sessionId, Session>) — populate lazily
+      on first obs hit needing it.
+   b) If includeAdjacentTurns: for each observation hit, scan
+      KV.observations(obs.sessionId) for the last observation with
+      timestamp < obs.timestamp that is type=="conversation" AND has a
+      userPrompt field; same for the latest assistant-side observation.
+      Cache per-session so multiple hits in one session share a single
+      KV.list call.
+   c) For memory hits: parse the source line from the content header
+      if it starts with "[Repo doc] " or "[Session handoff] ".
+      Regex: /^\[Repo doc\] [^:]+: ([^\s(]+)/
+
+6. If includeGraph:
+   - kv.list<GraphNode>(KV.graphNodes), filter by name.toLowerCase()
+     includes(qLower) OR exact-match of any tokenized phrase.
+   - For each matched node, kv.list<GraphEdge>(KV.graphEdges) filtered
+     by source/target == node.id; resolve neighbor node names + types.
+   - Attach to the top-level response, NOT per timeline item.
+
+7. Build firstMention from timeline[0] (after sort).
+
+8. Audit the call (kv recordAudit).
+```
+
+## Files to modify
+
+| file | change |
+|---|---|
+| `src/types.ts` | add `TimelineItem`, `LineageResult` interfaces |
+| `src/functions/lineage.ts` | **new** — implements `mem::lineage` per the algorithm above |
+| `src/index.ts` | register the lineage function (find where other `register*Function(sdk, kv)` calls live and add `registerLineageFunction(sdk, kv)`) |
+| `src/triggers/api.ts` | add `api::lineage` HTTP wrapper + trigger registration for `POST /agentmemory/lineage` (mirror the pattern of `api::search` or `api::smart-search`) |
+| `src/mcp/tools-registry.ts` | add `memory_lineage` tool entry so the MCP layer exposes it (mirror `memory_smart_search`) |
+
+No new env vars. No new KV namespaces. Reuses existing indexes.
+
+## Implementation notes & gotchas
+
+1. **BM25 index reuse**: `src/functions/search.ts` exports `getSearchIndex()`.
+   Confirm what types of entries the index holds before calling — observation
+   indexing happens at write time in observe.ts and remember.ts; lessons
+   may or may not be indexed (probably not). Either way, lesson/summary
+   substring-scan path handles those channels independently.
+
+2. **Adjacent-turn lookup**: `KV.observations(sessionId)` is a per-session
+   namespace. The fetch is O(n) in the session's observation count, but
+   we only do it once per unique sessionId in the hit set, and cache
+   the result. For a query that hits one big session 50 times, it's a
+   single list call.
+
+3. **Memory createdAt vs observation timestamp**: both exist as ISO strings.
+   Treat them uniformly for sort. CompressedObservation has `.timestamp`,
+   Memory has `.createdAt`. Lesson has `.createdAt`. SessionSummary has
+   `.createdAt`. Normalize on read.
+
+4. **Empty query** → return 400 with `error: "query is required"`.
+
+5. **No-match query** → return 200 with empty timeline, all zeros in
+   totalsByChannel, `firstMention: null`.
+
+6. **Snippet generation**: find first match position via
+   `content.toLowerCase().indexOf(qLower)`, take [pos-150 .. pos+150]
+   clipped at 0/length, prepend/append "…" if clipped. If the BM25
+   index already returned a snippet, prefer that.
+
+7. **Tokenization for graph node match**: the query may be a phrase
+   ("careful generator") that doesn't appear as a single graph-node
+   `name`. Fallback: split query on whitespace, match nodes whose name
+   contains ANY token. This is best-effort; if the user wants strict
+   matching they should query the graph directly.
+
+8. **Sort stability**: when two items share a timestamp (rare but
+   possible), break ties by `(channel, id)` lexicographic.
+
+## Validation criteria
+
+After implementation, the subagent must verify:
+
+```bash
+# 1. Build dist
+npm run build
+
+# 2. Rebuild container image
+docker compose -f docker/docker-compose.yml up -d --build
+
+# 3. Wait for /livez
+curl -fsS http://localhost:3111/agentmemory/livez
+
+# 4. The smoke test that motivated this work:
+curl -fsS -X POST http://localhost:3111/agentmemory/lineage \
+  -H 'content-type: application/json' \
+  -d '{"query":"careful generator","limit":30,"includeAdjacentTurns":true,"includeGraph":true}' \
+  | jq
+
+# Expected:
+#   - firstMention.timestamp ≈ 2026-04-19T18:19:57Z (earliest observation hit)
+#     OR 2026-04-26T11:39:45Z (the config-edit observation we grep-confirmed).
+#   - timeline.length > 0, sorted asc by timestamp
+#   - At least one observation hit from session 05988a74-...
+#   - At least one memory hit with sourceFile == "docs/architecture.md"
+#   - totalsByChannel.observation >= 5
+#   - totalsByChannel.memory >= 1
+#   - graphNeighbors is non-null (V3-C extracted nodes from architecture.md)
+
+# 5. Empty-query rejection:
+curl -fsS -X POST http://localhost:3111/agentmemory/lineage \
+  -H 'content-type: application/json' -d '{"query":""}' -i | head -3
+# Expected: HTTP 400
+
+# 6. No-match query:
+curl -fsS -X POST http://localhost:3111/agentmemory/lineage \
+  -H 'content-type: application/json' \
+  -d '{"query":"zzz_no_such_concept_zzz"}' | jq
+# Expected: timeline=[], totalsByChannel all 0, firstMention=null
+```
+
+## Out of scope (filed for later)
+
+- **Smart-search ranker tuning** (don't crowd lessons over memories). Separate
+  ~10-line change to `src/functions/search.ts`. Not in v4-A.
+- **Graph-traversal retrieval** (find via graph edges, not text match). Bigger
+  design; v4-B if there's appetite.
+- **Cross-session entity merging** (handoff for "careful generator" in session
+  A links to its first mention in session B). Requires entity-resolution
+  logic; v4-C+.
diff --git a/docs/plans/v4-lineage-test-case-careful-generator.md b/docs/plans/v4-lineage-test-case-careful-generator.md
new file mode 100644
index 00000000..2cd7b4b0
--- /dev/null
+++ b/docs/plans/v4-lineage-test-case-careful-generator.md
@@ -0,0 +1,200 @@
+# Test case: "Who is the careful generator?"
+
+A canonical regression test for agentmemory's lineage/recall capabilities.
+This scenario is what motivated the `mem::lineage` design (v4-A) and
+reveals the limits of smart-search + the residual gaps in v4-A itself.
+
+## The question
+
+> *"Who is the careful generator?"*
+
+Trivial-sounding. The right answer is a one-line lookup. But it's
+secretly testing several capabilities at once.
+
+## What we know (out-of-band ground truth)
+
+**Definition.** From `docs/architecture.md:308-309` and
+`docs/configuration.md:176-177`:
+
+```
+analyse_manifest:  vast-qwen36-35b   # Tier 2 — careful generator
+diff_complex:      vast-qwen36-35b
+```
+
+So **"careful generator" = Tier 2 = Qwen3.6-35B-A3B-FP8**, paired with:
+
+- **Tier 1 = "premium reasoning" / colloquially "the judgement" = Qwen3.5-397B**
+  via Together. Knows when to stop intrinsically; doesn't need bail-prompting.
+- **Tier 2 = "careful generator" = Qwen3.6-35B-A3B-FP8**. Smaller, faster,
+  but needs explicit prompting on when to stop.
+
+**Provenance (user-supplied context, 2026-05-19).** The nicknames were
+coined during a **benchmark session** where multiple models were pitted
+against each other, qwen36 was the clear winner on the
+generator-shaped tasks (`analyse_manifest`, `diff_complex`). The session
+also coincided with the first exploration of serverless alternatives —
+and the conclusion at the time was that nothing on serverless matched
+what qwen36 offered on vast-pod hosting.
+
+**Earliest written trace (corpus-confirmed).** The comments were
+hardened into the codebase at `2026-04-26T11:39:45.123Z` in session
+`05988a74-d1f1-42a1-9cd4-53b4db205ff3` — a config edit adding the
+tier-routed pipeline comments. The conversation that produced those
+edits is somewhere earlier (probably mid-to-late April).
+
+## What this scenario tests
+
+A working memory system should answer each of these:
+
+| sub-question | shape | required capability |
+|---|---|---|
+| What does "careful generator" mean? | definition | direct retrieval against architecture.md memory |
+| When did this term enter our vocabulary? | first-mention timestamp | chronological retrieval (lineage) |
+| What was the surrounding context? | session metadata + adjacent turns | obs enrichment |
+| Who's the companion concept? | related-entity traversal | graph-edge retrieval |
+| Why did we pick qwen36 specifically? | rationale | summary/handoff retrieval over the benchmark session |
+| Did we revisit this when serverless improved? | follow-up surface | cross-session temporal traversal |
+
+## Observed behavior (as of 2026-05-19 evening)
+
+### `mem::smart-search "who is the careful generator?"`
+
+Returned **8 unrelated lessons** (top score 0.726 — session-handoffs
+about May 1 work that mentioned "careful" in unrelated contexts). The
+[Repo doc] memory of architecture.md did not appear in either channel.
+
+**Diagnosis:** smart-search ranker favors the lesson channel and
+crowds out memory hits. The vector channel doesn't pull a 19 KB doc
+based on a single inline comment phrase.
+
+### `mem::search` (BM25-only) `"careful generator"`
+
+Returned correct hits with real signal — scores 7–14, observations
++ memories interleaved, the architecture.md memory surfaced. BM25
+proves the data is in the corpus and the index has it.
+
+### `mem::lineage` (v4-A initial implementation)
+
+Returned a populated timeline of 30 items sorted ASC:
+
+- **`firstMention`**: `2026-04-18T08:26:37Z`, project `observer-sessions`,
+  session `2d7f99c4-...`
+- **Hit distribution**: observation=23, memory=71, lesson=0, summary=0
+  (top 30 returned)
+- **adjacentTurns** attached on 14/23 obs hits
+- **graphNeighbors**: `[]` (no graph node with `name` containing "careful"
+  or "generator" — graph-extract was run over architecture.md content
+  but didn't surface the inline comment phrase as a node name)
+- **Architecture.md memory hit**: present, with correct sourceFile
+  extracted
+
+**Diagnosis:** v4-A works mechanically — sorted timeline, channel
+totals, enrichment, all correct. But `firstMention` is wrong: the
+`observer-sessions` synthetic project (agentmemory's own meta-observer
+watching primary sessions) emits records containing tokens that BM25
+matches. They time-sort to the top because they're earlier than the
+actual conversations.
+
+The **real** first mention — the benchmark conversation — likely lives
+in observations from a non-observer session. The user's recollection
+places it "around when we first looked at serverless" (probably
+late March / early-mid April 2026 based on related context).
+
+## Gaps surfaced
+
+1. **`mem::lineage` doesn't filter observer/agent meta-sessions** by
+   default — same gap that `scripts/rebuild-graph.sh` and
+   `emit_observations` explicitly handle. Should default-exclude
+   projects matching `^(observer|agent-)` with an opt-in
+   `--include-observer` style override.
+
+2. **BM25 sweep is bounded at `min(limit*4, 500)`** — the very long
+   gitops-assistant session `05988a74-...` (10,704 observations) has
+   "careful generator" references that didn't make the top 200 ranked.
+   Either raise the cap when channel filtering is wide, or scan all
+   obs in matched sessions to ensure no in-session reference is
+   dropped.
+
+3. **Graph-extraction over docs missed the inline comment phrases.**
+   `parseGraphXml` extracted entities from architecture.md's prose
+   sections, but the comment line
+   `# Tier 2 — careful generator (Qwen3.6-35B-A3B-FP8 on vast pod)` was
+   treated as code/config noise, not a concept-defining edge. No
+   `GraphNode(name="careful generator")` exists, so `includeGraph: true`
+   returns `[]`.
+
+4. **The benchmark session itself is not findable as a structured
+   record.** It happened (per the user) but the corpus doesn't seem to
+   have a session summary or memory record about "we benchmarked
+   qwen35-397b vs qwen36-35b vs X, qwen36 won on generator tasks". The
+   nicknames stuck in code comments but the *reasoning behind picking
+   the nickname* (the benchmark) was never crystallized as a memory.
+   This is a memory-curation gap, not a retrieval gap.
+
+## Validation criteria for future re-runs
+
+Re-running this test case after improvements should validate:
+
+```bash
+# A. Lineage smoke (after observer-filter fix):
+curl -fsS -X POST http://localhost:3111/agentmemory/lineage \
+  -H 'content-type: application/json' \
+  -d '{"query":"careful generator","limit":30,"order":"asc"}' \
+  | jq '.firstMention'
+
+# Pass criteria:
+#   - .project NOT IN ["observer-sessions", "agent-*"]
+#   - .timestamp ideally falls within the user-described benchmark
+#     window (probably April 2026 mid-to-late, pre-config-edit on Apr 26)
+
+# B. Graph traversal (after architecture-doc graph-extraction is
+#    re-run with prompt tuning that surfaces comment phrases):
+curl -fsS -X POST http://localhost:3111/agentmemory/lineage \
+  -H 'content-type: application/json' \
+  -d '{"query":"careful generator","includeGraph":true}' \
+  | jq '.graphNeighbors'
+
+# Pass criteria:
+#   - non-empty
+#   - At least one neighbor is "Qwen3.6-35B-A3B-FP8" or "vast-qwen36-35b"
+#     with relation type "uses", "is", or "implements"
+
+# C. Smart-search re-ranker:
+curl -fsS -X POST http://localhost:3111/agentmemory/smart-search \
+  -H 'content-type: application/json' \
+  -d '{"query":"who is the careful generator","limit":10}'
+
+# Pass criteria:
+#   - architecture.md or configuration.md memory in top 5 hits
+#   - score > 0.3 on the relevant memory
+```
+
+## Follow-up work surfaced by this test case
+
+In rough priority:
+
+1. **v4-A patch**: default-exclude observer/agent projects in
+   `mem::lineage`. ~5 lines. Highest leverage.
+2. **Capture the benchmark session as a project memory**: a
+   `project_qwen36_v_qwen35_benchmark.md` documenting what was tested,
+   the results, why qwen36 won on generator tasks, and why serverless
+   alternatives were rejected at the time. Pure curation — no code
+   change. The user has the context; the corpus doesn't.
+3. **Smart-search channel re-ranker** (v4-B): boost the memory channel
+   for queries with named-concept patterns ("who is X", "what is X",
+   "define X"). Smaller surface than v4-A's lineage primitive but
+   targets a more common query shape.
+4. **Comment-aware graph extraction** (v4-C): tune the graph-extraction
+   prompt or post-processor to treat code comments like
+   `# Tier 2 — careful generator (...)` as concept-defining
+   declarations. Currently they're treated as code noise.
+
+## Why this test case is durable
+
+It's a real recall miss from a real workflow with verifiable ground
+truth in the corpus. As long as `docs/architecture.md` retains the
+"Tier 2 — careful generator" comment and the gitops-assistant session
+history exists, this scenario is re-runnable across agentmemory
+versions to track recall regressions and improvements. Any future
+PR that touches lineage, smart-search ranking, or graph extraction
+should be re-tested against this case.
diff --git a/src/functions/lineage.ts b/src/functions/lineage.ts
new file mode 100644
index 00000000..1a912968
--- /dev/null
+++ b/src/functions/lineage.ts
@@ -0,0 +1,455 @@
+import type { ISdk } from "iii-sdk";
+import type {
+  CompressedObservation,
+  GraphEdge,
+  GraphNode,
+  GraphNodeType,
+  Lesson,
+  LineageChannel,
+  LineageGraphNeighbor,
+  LineageResult,
+  Memory,
+  Session,
+  SessionSummary,
+  TimelineItem,
+} from "../types.js";
+import { KV } from "../state/schema.js";
+import type { StateKV } from "../state/kv.js";
+import { getSearchIndex, rebuildIndex } from "./search.js";
+import { safeAudit } from "./audit.js";
+import { logger } from "../logger.js";
+
+// Concept-lineage retrieval. Unlike mem::search (relevance) and
+// mem::smart-search (lessons-first ranker), this primitive returns
+// chronologically-sorted hits across observation, memory, lesson, and
+// summary channels — answering "when did this term enter the corpus,
+// and what surrounded it?". Reuses the existing BM25 index for obs/mem
+// and falls through to substring scans for lessons/summaries.
+
+const ALL_CHANNELS: LineageChannel[] = [
+  "observation",
+  "memory",
+  "lesson",
+  "summary",
+];
+
+interface LineageRequest {
+  query: string;
+  limit?: number;
+  since?: string;
+  until?: string;
+  channels?: LineageChannel[];
+  includeAdjacentTurns?: boolean;
+  includeGraph?: boolean;
+  order?: "asc" | "desc";
+}
+
+function isValidIsoTimestamp(value: unknown): value is string {
+  if (typeof value !== "string") return false;
+  const t = Date.parse(value);
+  return Number.isFinite(t);
+}
+
+function buildSnippet(content: string, qLower: string): string {
+  if (!content) return "";
+  const lower = content.toLowerCase();
+  const pos = lower.indexOf(qLower);
+  if (pos < 0) {
+    return content.length <= 300 ? content : content.slice(0, 300) + "…";
+  }
+  const start = Math.max(0, pos - 150);
+  const end = Math.min(content.length, pos + qLower.length + 150);
+  const head = start > 0 ? "…" : "";
+  const tail = end < content.length ? "…" : "";
+  return head + content.slice(start, end) + tail;
+}
+
+// Repo doc and session-handoff memories embed their source in the first
+// line of content. Pull it out so callers can filter by sourceFile.
+// Headers come in two flavors:
+//   [Repo doc] <project>: <path/to/file>
+//   [Session handoff] <project>: <path/to/file>
+// Both have an optional "(chunk i/n)" suffix. Capture the path token.
+const REPO_DOC_RE = /^\[Repo doc\] [^:]+:\s+([^\s(]+)/;
+const SESSION_HANDOFF_RE = /^\[Session handoff\] [^:]+:\s+([^\s(]+)/;
+
+function extractMemorySourceFile(content: string): string | undefined {
+  const firstLine = content.split("\n", 1)[0] ?? "";
+  const repo = REPO_DOC_RE.exec(firstLine);
+  if (repo) return repo[1];
+  const handoff = SESSION_HANDOFF_RE.exec(firstLine);
+  if (handoff) return handoff[1];
+  return undefined;
+}
+
+function inRange(timestamp: string, since?: number, until?: number): boolean {
+  const t = Date.parse(timestamp);
+  if (!Number.isFinite(t)) return false;
+  if (since !== undefined && t < since) return false;
+  if (until !== undefined && t > until) return false;
+  return true;
+}
+
+function tieBreak(a: TimelineItem, b: TimelineItem): number {
+  if (a.channel !== b.channel) return a.channel < b.channel ? -1 : 1;
+  if (a.id !== b.id) return a.id < b.id ? -1 : 1;
+  return 0;
+}
+
+export function registerLineageFunction(sdk: ISdk, kv: StateKV): void {
+  sdk.registerFunction(
+    "mem::lineage",
+    async (data: LineageRequest): Promise<LineageResult | { error: string }> => {
+      if (typeof data?.query !== "string" || !data.query.trim()) {
+        return { error: "query is required" };
+      }
+      const query = data.query.trim();
+      const qLower = query.toLowerCase();
+
+      const limit =
+        typeof data.limit === "number" && Number.isInteger(data.limit) && data.limit > 0
+          ? Math.min(data.limit, 500)
+          : 50;
+
+      const since = isValidIsoTimestamp(data.since) ? Date.parse(data.since) : undefined;
+      const until = isValidIsoTimestamp(data.until) ? Date.parse(data.until) : undefined;
+
+      const requestedChannels =
+        Array.isArray(data.channels) && data.channels.length > 0
+          ? (data.channels.filter((c): c is LineageChannel =>
+              ALL_CHANNELS.includes(c as LineageChannel),
+            ) as LineageChannel[])
+          : ALL_CHANNELS;
+      const channelSet = new Set<LineageChannel>(requestedChannels);
+
+      const includeAdjacentTurns = data.includeAdjacentTurns !== false;
+      const includeGraph = data.includeGraph === true;
+      const order: "asc" | "desc" = data.order === "desc" ? "desc" : "asc";
+
+      const items: TimelineItem[] = [];
+
+      // (a) BM25 path covers observations + memories (memories are
+      // indexed under their own id with sessionId fallback "memory"
+      // via memoryToObservation).
+      if (channelSet.has("observation") || channelSet.has("memory")) {
+        const idx = getSearchIndex();
+        if (idx.size === 0) {
+          try {
+            const count = await rebuildIndex(kv);
+            logger.info("Search index rebuilt for lineage", { entries: count });
+          } catch (err) {
+            logger.warn("lineage: rebuild index failed", {
+              error: err instanceof Error ? err.message : String(err),
+            });
+          }
+        }
+        // v4-A Gap 2 fix: bound the sweep generously so deep-in-session
+        // references in large jsonl-imported sessions (10k+ obs) still
+        // rank into the channel-filtered top N. Was min(limit*4, 500),
+        // which missed in-session refs in the Apr 26→May 17 GA session.
+        const bm25Hits = idx.search(query, Math.min(Math.max(limit * 20, 1000), 5000));
+
+        // Resolve each hit to either an observation or a memory.
+        const memoryCache = new Map<string, Memory | null>();
+        const obsCache = new Map<string, CompressedObservation | null>();
+
+        for (const hit of bm25Hits) {
+          // Memory hits have sessionId == "memory" (synthetic) OR live
+          // in KV.memories with a real sessionId. Probe memory scope by
+          // id first; fall back to observation lookup.
+          let mem = memoryCache.get(hit.obsId);
+          if (mem === undefined) {
+            try {
+              mem = (await kv.get<Memory>(KV.memories, hit.obsId)) ?? null;
+            } catch {
+              mem = null;
+            }
+            memoryCache.set(hit.obsId, mem);
+          }
+          if (mem && mem.isLatest !== false) {
+            if (!channelSet.has("memory")) continue;
+            const ts = mem.createdAt;
+            if (!inRange(ts, since, until)) continue;
+            items.push({
+              timestamp: ts,
+              channel: "memory",
+              id: mem.id,
+              title: mem.title,
+              snippet: buildSnippet(mem.content, qLower),
+              score: hit.score,
+              sourceFile: extractMemorySourceFile(mem.content),
+              memoryType: mem.type,
+            });
+            continue;
+          }
+
+          if (!channelSet.has("observation")) continue;
+          let obs = obsCache.get(hit.obsId);
+          if (obs === undefined) {
+            try {
+              obs =
+                (await kv.get<CompressedObservation>(
+                  KV.observations(hit.sessionId),
+                  hit.obsId,
+                )) ?? null;
+            } catch {
+              obs = null;
+            }
+            obsCache.set(hit.obsId, obs);
+          }
+          if (!obs) continue;
+          if (!inRange(obs.timestamp, since, until)) continue;
+          const snippetSource =
+            obs.narrative || obs.facts.join(" ") || obs.title;
+          items.push({
+            timestamp: obs.timestamp,
+            channel: "observation",
+            id: obs.id,
+            sessionId: obs.sessionId,
+            title: obs.title,
+            type: obs.type,
+            snippet: buildSnippet(snippetSource, qLower),
+            score: hit.score,
+          });
+        }
+      }
+
+      // (b) lesson substring scan
+      if (channelSet.has("lesson")) {
+        const lessons = await kv.list<Lesson>(KV.lessons);
+        for (const lesson of lessons) {
+          if (lesson.deleted) continue;
+          if (!lesson.content) continue;
+          if (!lesson.content.toLowerCase().includes(qLower)) continue;
+          const ts = lesson.createdAt;
+          if (!inRange(ts, since, until)) continue;
+          items.push({
+            timestamp: ts,
+            channel: "lesson",
+            id: lesson.id,
+            project: lesson.project,
+            title: lesson.content.slice(0, 80),
+            snippet: buildSnippet(lesson.content, qLower),
+            score: 0,
+          });
+        }
+      }
+
+      // (c) summary substring scan
+      if (channelSet.has("summary")) {
+        const summaries = await kv.list<SessionSummary>(KV.summaries);
+        for (const sum of summaries) {
+          if (!sum.narrative) continue;
+          if (!sum.narrative.toLowerCase().includes(qLower)) continue;
+          const ts = sum.createdAt;
+          if (!inRange(ts, since, until)) continue;
+          items.push({
+            timestamp: ts,
+            channel: "summary",
+            id: sum.sessionId,
+            sessionId: sum.sessionId,
+            project: sum.project,
+            title: sum.title,
+            snippet: buildSnippet(sum.narrative, qLower),
+            score: 0,
+          });
+        }
+      }
+
+      // Sort, trim to limit, then enrich (so enrichment cost scales
+      // with displayed items, not raw match count).
+      items.sort((a, b) => {
+        const ta = Date.parse(a.timestamp);
+        const tb = Date.parse(b.timestamp);
+        if (ta !== tb) return order === "asc" ? ta - tb : tb - ta;
+        return tieBreak(a, b);
+      });
+      const trimmed = items.slice(0, limit);
+
+      // Session lookup cache for observation/summary items.
+      const sessionCache = new Map<string, Session | null>();
+      const loadSession = async (sessionId: string): Promise<Session | null> => {
+        if (sessionCache.has(sessionId)) return sessionCache.get(sessionId)!;
+        let s: Session | null = null;
+        try {
+          s = (await kv.get<Session>(KV.sessions, sessionId)) ?? null;
+        } catch {
+          s = null;
+        }
+        sessionCache.set(sessionId, s);
+        return s;
+      };
+
+      // Per-session observation cache so multiple hits in one session
+      // share a single KV.list call when computing adjacent turns.
+      const obsListCache = new Map<string, CompressedObservation[]>();
+      const loadSessionObs = async (
+        sessionId: string,
+      ): Promise<CompressedObservation[]> => {
+        if (obsListCache.has(sessionId)) return obsListCache.get(sessionId)!;
+        let list: CompressedObservation[] = [];
+        try {
+          list = await kv.list<CompressedObservation>(KV.observations(sessionId));
+        } catch {
+          list = [];
+        }
+        list.sort(
+          (a, b) => Date.parse(a.timestamp) - Date.parse(b.timestamp),
+        );
+        obsListCache.set(sessionId, list);
+        return list;
+      };
+
+      for (const item of trimmed) {
+        if (item.channel === "observation" && item.sessionId) {
+          const s = await loadSession(item.sessionId);
+          if (s) {
+            item.session = {
+              id: s.id,
+              project: s.project,
+              startedAt: s.startedAt,
+              firstPrompt: s.firstPrompt,
+            };
+            if (!item.project) item.project = s.project;
+          }
+          if (includeAdjacentTurns) {
+            const obsList = await loadSessionObs(item.sessionId);
+            const idx = obsList.findIndex((o) => o.id === item.id);
+            if (idx >= 0) {
+              // Walk backwards for the previous conversation turn
+              // (userPrompt → obs.narrative when type=="conversation")
+              // and the previous non-conversation turn (assistant-side
+              // tool use, which acts as a stand-in for the assistant's
+              // most recent observable action).
+              let prevUser: CompressedObservation | undefined;
+              let prevAssistant: CompressedObservation | undefined;
+              for (let i = idx - 1; i >= 0; i--) {
+                const o = obsList[i];
+                if (!prevUser && o.type === "conversation") prevUser = o;
+                else if (!prevAssistant && o.type !== "conversation")
+                  prevAssistant = o;
+                if (prevUser && prevAssistant) break;
+              }
+              if (prevUser || prevAssistant) {
+                item.adjacentTurns = {
+                  previousUserPrompt: prevUser?.narrative,
+                  previousAssistantSummary:
+                    prevAssistant?.title && prevAssistant.narrative
+                      ? `${prevAssistant.title}: ${prevAssistant.narrative}`
+                      : prevAssistant?.narrative,
+                };
+              }
+            }
+          }
+        } else if (item.channel === "summary" && item.sessionId) {
+          const s = await loadSession(item.sessionId);
+          if (s) {
+            item.session = {
+              id: s.id,
+              project: s.project,
+              startedAt: s.startedAt,
+              firstPrompt: s.firstPrompt,
+            };
+            if (!item.project) item.project = s.project;
+          }
+        }
+      }
+
+      const totalsByChannel: Record<LineageChannel, number> = {
+        observation: 0,
+        memory: 0,
+        lesson: 0,
+        summary: 0,
+      };
+      for (const it of items) totalsByChannel[it.channel]++;
+
+      // firstMention always points at the earliest timestamp in the
+      // filtered set, regardless of `order`. Use the asc-sorted view.
+      const earliest =
+        order === "asc"
+          ? trimmed[0]
+          : trimmed.length > 0
+            ? trimmed[trimmed.length - 1]
+            : undefined;
+      const firstMention = earliest
+        ? {
+            timestamp: earliest.timestamp,
+            channel: earliest.channel,
+            sessionId: earliest.sessionId,
+            project: earliest.project,
+          }
+        : null;
+
+      let graphNeighbors: LineageGraphNeighbor[] | undefined;
+      if (includeGraph) {
+        graphNeighbors = [];
+        try {
+          const nodes = await kv.list<GraphNode>(KV.graphNodes);
+          const tokens = qLower
+            .split(/\s+/)
+            .map((t) => t.trim())
+            .filter((t) => t.length >= 3);
+          const matchedNodes = nodes.filter((n) => {
+            if (!n || typeof n.name !== "string") return false;
+            const nameLower = n.name.toLowerCase();
+            if (nameLower.includes(qLower)) return true;
+            for (const tok of tokens) {
+              if (nameLower.includes(tok)) return true;
+            }
+            return false;
+          });
+          if (matchedNodes.length > 0) {
+            const edges = await kv.list<GraphEdge>(KV.graphEdges);
+            const nodeById = new Map<string, GraphNode>();
+            for (const n of nodes) nodeById.set(n.id, n);
+            for (const node of matchedNodes) {
+              const related = edges.filter(
+                (e) => e.sourceNodeId === node.id || e.targetNodeId === node.id,
+              );
+              const edgeOut = related
+                .map((e) => {
+                  const otherId =
+                    e.sourceNodeId === node.id ? e.targetNodeId : e.sourceNodeId;
+                  const other = nodeById.get(otherId);
+                  if (!other) return null;
+                  return {
+                    kind: e.type,
+                    neighbor: other.name,
+                    neighborType: other.type as GraphNodeType,
+                  };
+                })
+                .filter((e): e is NonNullable<typeof e> => e !== null);
+              graphNeighbors.push({
+                name: node.name,
+                type: node.type,
+                edges: edgeOut,
+              });
+            }
+          }
+        } catch (err) {
+          logger.warn("lineage: graph neighbor lookup failed", {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        }
+      }
+
+      void safeAudit(kv, "query", "mem::lineage", [], {
+        query,
+        hits: items.length,
+        returned: trimmed.length,
+        channels: requestedChannels,
+        includeAdjacentTurns,
+        includeGraph,
+      });
+
+      const result: LineageResult = {
+        query,
+        firstMention,
+        timeline: trimmed,
+        totalsByChannel,
+      };
+      if (graphNeighbors !== undefined) result.graphNeighbors = graphNeighbors;
+      return result;
+    },
+  );
+}
diff --git a/src/index.ts b/src/index.ts
index 704d4809..09b6aa77 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -49,6 +49,7 @@ import { registerEvictFunction } from "./functions/evict.js";
 import { registerRelationsFunction } from "./functions/relations.js";
 import { registerTimelineFunction } from "./functions/timeline.js";
 import { registerSmartSearchFunction } from "./functions/smart-search.js";
+import { registerLineageFunction } from "./functions/lineage.js";
 import { registerProfileFunction } from "./functions/profile.js";
 import { registerAutoForgetFunction } from "./functions/auto-forget.js";
 import { registerExportImportFunction } from "./functions/export-import.js";
@@ -211,6 +212,7 @@ async function main() {
   registerDiskSizeManager(sdk, kv);
   registerCompressFunction(sdk, kv, provider, metricsStore);
   registerSearchFunction(sdk, kv);
+  registerLineageFunction(sdk, kv);
   registerContextFunction(sdk, kv, config.tokenBudget);
   registerSummarizeFunction(sdk, kv, provider, metricsStore);
   registerMigrateFunction(sdk, kv);
@@ -481,7 +483,7 @@ async function main() {
     `Ready. ${embeddingProvider ? "Triple-stream (BM25+Vector+Graph)" : "BM25+Graph"} search active.`,
   );
   bootLog(
-    `REST API: 124 endpoints at http://localhost:${config.restPort}/agentmemory/*`,
+    `REST API: 125 endpoints at http://localhost:${config.restPort}/agentmemory/*`,
   );
   bootLog(
     `MCP surface (opt-in via \`npx @agentmemory/mcp\`): ${getAllTools().length} tools · 6 resources · 3 prompts`,
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index b3b0585d..774398e8 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -275,6 +275,41 @@ export function registerMcpEndpoints(
             };
           }
 
+          case "memory_lineage": {
+            if (typeof args.query !== "string" || !args.query.trim()) {
+              return {
+                status_code: 400,
+                body: { error: "query is required for memory_lineage" },
+              };
+            }
+            const channels = parseCsvList(args.channels);
+            const payload: Record<string, unknown> = {
+              query: args.query,
+            };
+            const limit = asNumber(args.limit);
+            if (limit !== undefined) payload.limit = Math.max(1, Math.min(500, limit));
+            if (typeof args.since === "string") payload.since = args.since;
+            if (typeof args.until === "string") payload.until = args.until;
+            if (channels.length > 0) payload.channels = channels;
+            if (typeof args.includeAdjacentTurns === "boolean")
+              payload.includeAdjacentTurns = args.includeAdjacentTurns;
+            if (typeof args.includeGraph === "boolean")
+              payload.includeGraph = args.includeGraph;
+            if (typeof args.order === "string") payload.order = args.order;
+            const result = await sdk.trigger({
+              function_id: "mem::lineage",
+              payload,
+            });
+            return {
+              status_code: 200,
+              body: {
+                content: [
+                  { type: "text", text: JSON.stringify(result, null, 2) },
+                ],
+              },
+            };
+          }
+
           case "memory_vision_search": {
             const queryText = typeof args.queryText === "string" ? args.queryText : undefined;
             const queryImageRef = typeof args.queryImageRef === "string" ? args.queryImageRef : undefined;
diff --git a/src/mcp/tools-registry.ts b/src/mcp/tools-registry.ts
index 3001cae7..5959fd0c 100644
--- a/src/mcp/tools-registry.ts
+++ b/src/mcp/tools-registry.ts
@@ -126,6 +126,38 @@ export const CORE_TOOLS: McpToolDef[] = [
       required: ["query"],
     },
   },
+  {
+    name: "memory_lineage",
+    description:
+      "Concept lineage: chronologically-ordered hits for a phrase across observation, memory, lesson, and summary channels. Use to trace when a term first entered the corpus and what surrounded it.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        query: { type: "string", description: "Phrase or term to trace" },
+        limit: { type: "number", description: "Max timeline items (default 50)" },
+        since: { type: "string", description: "ISO 8601 lower bound on timestamp" },
+        until: { type: "string", description: "ISO 8601 upper bound on timestamp" },
+        channels: {
+          type: "string",
+          description:
+            "Comma-separated channels to search: observation,memory,lesson,summary (default all)",
+        },
+        includeAdjacentTurns: {
+          type: "boolean",
+          description: "Attach previous user/assistant turn for observation hits (default true)",
+        },
+        includeGraph: {
+          type: "boolean",
+          description: "Attach graph-edge neighbors for matching nodes (default false)",
+        },
+        order: {
+          type: "string",
+          description: "'asc' (oldest first, default) or 'desc'",
+        },
+      },
+      required: ["query"],
+    },
+  },
   {
     name: "memory_vision_search",
     description:
@@ -917,6 +949,7 @@ export const V010_SLOTS_TOOLS: McpToolDef[] = [
   },
 ];
 
+
 const ESSENTIAL_TOOLS = new Set([
   "memory_save",
   "memory_recall",
diff --git a/src/triggers/api.ts b/src/triggers/api.ts
index 083c2159..eb2c0dc5 100644
--- a/src/triggers/api.ts
+++ b/src/triggers/api.ts
@@ -991,7 +991,75 @@ export function registerApiTriggers(
     config: { api_path: "/agentmemory/smart-search", http_method: "POST" },
   });
 
-  sdk.registerFunction("api::timeline", 
+  sdk.registerFunction("api::lineage",
+    async (
+      req: ApiRequest<{
+        query?: string;
+        limit?: number;
+        since?: string;
+        until?: string;
+        channels?: string[];
+        includeAdjacentTurns?: boolean;
+        includeGraph?: boolean;
+        order?: string;
+      }>,
+    ): Promise<Response> => {
+      const authErr = checkAuth(req, secret);
+      if (authErr) return authErr;
+      const body = (req.body ?? {}) as Record<string, unknown>;
+      if (typeof body.query !== "string" || !body.query.trim()) {
+        return { status_code: 400, body: { error: "query is required" } };
+      }
+      if (
+        body.limit !== undefined &&
+        (!Number.isInteger(body.limit) || (body.limit as number) < 1)
+      ) {
+        return { status_code: 400, body: { error: "limit must be a positive integer" } };
+      }
+      if (
+        body.channels !== undefined &&
+        (!Array.isArray(body.channels) ||
+          !body.channels.every((c) => typeof c === "string"))
+      ) {
+        return {
+          status_code: 400,
+          body: { error: "channels must be an array of strings" },
+        };
+      }
+      if (
+        body.order !== undefined &&
+        (typeof body.order !== "string" ||
+          !["asc", "desc"].includes(body.order.trim().toLowerCase()))
+      ) {
+        return {
+          status_code: 400,
+          body: { error: "order must be 'asc' or 'desc'" },
+        };
+      }
+      const result = await sdk.trigger({
+        function_id: "mem::lineage",
+        payload: req.body,
+      });
+      // mem::lineage returns { error } on validation problems we
+      // didn't catch upstream (e.g. empty trimmed query). Surface as 400.
+      if (
+        result &&
+        typeof result === "object" &&
+        "error" in (result as Record<string, unknown>) &&
+        !("timeline" in (result as Record<string, unknown>))
+      ) {
+        return { status_code: 400, body: result };
+      }
+      return { status_code: 200, body: result };
+    },
+  );
+  sdk.registerTrigger({
+    type: "http",
+    function_id: "api::lineage",
+    config: { api_path: "/agentmemory/lineage", http_method: "POST" },
+  });
+
+  sdk.registerFunction("api::timeline",
     async (
       req: ApiRequest<{
         anchor: string;
diff --git a/src/types.ts b/src/types.ts
index 72e347b3..e66988ca 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -282,6 +282,58 @@ export interface TimelineEntry {
   relativePosition: number;
 }
 
+export type LineageChannel = "observation" | "memory" | "lesson" | "summary";
+
+export interface TimelineItem {
+  timestamp: string;
+  channel: LineageChannel;
+  id: string;
+  sessionId?: string;
+  project?: string;
+  title: string;
+  type?: string;
+  snippet: string;
+  score: number;
+  // memory-specific
+  sourceFile?: string;
+  memoryType?: Memory["type"];
+  // session enrichment (observation/summary)
+  session?: {
+    id: string;
+    project: string;
+    startedAt: string;
+    firstPrompt?: string;
+  };
+  // observation-only enrichment
+  adjacentTurns?: {
+    previousUserPrompt?: string;
+    previousAssistantSummary?: string;
+  };
+}
+
+export interface LineageGraphNeighbor {
+  name: string;
+  type: GraphNodeType;
+  edges: Array<{
+    kind: GraphEdgeType;
+    neighbor: string;
+    neighborType: GraphNodeType;
+  }>;
+}
+
+export interface LineageResult {
+  query: string;
+  firstMention: {
+    timestamp: string;
+    channel: LineageChannel;
+    sessionId?: string;
+    project?: string;
+  } | null;
+  timeline: TimelineItem[];
+  totalsByChannel: Record<LineageChannel, number>;
+  graphNeighbors?: LineageGraphNeighbor[];
+}
+
 export interface ProjectProfile {
   project: string;
   updatedAt: string;
@@ -546,7 +598,8 @@ export interface AuditEntry {
     | "slot_replace"
     | "slot_create"
     | "slot_delete"
-    | "slot_reflect";
+    | "slot_reflect"
+    | "query";
   userId?: string;
   functionId: string;
   targetIds: string[];
diff --git a/test/mcp-standalone.test.ts b/test/mcp-standalone.test.ts
index b48eade9..80262188 100644
--- a/test/mcp-standalone.test.ts
+++ b/test/mcp-standalone.test.ts
@@ -68,8 +68,8 @@ describe("Tools Registry", () => {
     }
   });
 
-  it("CORE_TOOLS has 14 items", () => {
-    expect(CORE_TOOLS.length).toBe(14);
+  it("CORE_TOOLS has 15 items", () => {
+    expect(CORE_TOOLS.length).toBe(15);
   });
 
   it("V040_TOOLS has 8 items", () => {

From 6a4de14afd05705d232492368d723ec7349ca662 Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 18:07:09 +0200
Subject: [PATCH 2/8] fix(lineage): address CodeRabbit #570 review

Three real issues caught in review:

1. firstMention computed from `trimmed` (post-limit page) instead of
   `items` (entire filtered set). When `order:desc` + a small `limit`
   truncated a session with many hits, the reported firstMention was
   the oldest-in-page, not the actual earliest filtered hit. Switch to
   `items` so the semantic contract holds regardless of page size.

2. MCP boundary (memory_lineage in src/mcp/server.ts) accepted any
   non-integer `limit` and any `order` string. Now: validate `limit`
   is a positive integer (400 otherwise), validate `order` is
   "asc"|"desc" (400 otherwise), filter `channels` to the known enum
   before forwarding.

3. REST boundary (api::lineage in src/triggers/api.ts) was forwarding
   raw `req.body` after validation, which leaks caller-controlled keys
   to the downstream function. Build a whitelisted payload from the
   validated fields only.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/functions/lineage.ts | 12 ++++++++----
 src/mcp/server.ts        | 30 +++++++++++++++++++++++++++---
 src/triggers/api.ts      | 17 ++++++++++++++++-
 3 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/src/functions/lineage.ts b/src/functions/lineage.ts
index 1a912968..34070209 100644
--- a/src/functions/lineage.ts
+++ b/src/functions/lineage.ts
@@ -364,12 +364,16 @@ export function registerLineageFunction(sdk: ISdk, kv: StateKV): void {
       for (const it of items) totalsByChannel[it.channel]++;
 
       // firstMention always points at the earliest timestamp in the
-      // filtered set, regardless of `order`. Use the asc-sorted view.
+      // ENTIRE filtered set, regardless of `order` or `limit`. Use
+      // `items` (pre-trim, fully sorted), not `trimmed` — otherwise a
+      // session with more hits than the page size + `order:desc` would
+      // report the oldest-in-page as firstMention instead of the actual
+      // earliest mention. CodeRabbit caught this on #570.
       const earliest =
         order === "asc"
-          ? trimmed[0]
-          : trimmed.length > 0
-            ? trimmed[trimmed.length - 1]
+          ? items[0]
+          : items.length > 0
+            ? items[items.length - 1]
             : undefined;
       const firstMention = earliest
         ? {
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index 774398e8..e5768758 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -283,19 +283,43 @@ export function registerMcpEndpoints(
               };
             }
             const channels = parseCsvList(args.channels);
+            // Filter to the known channel enum so unknown values don't
+            // reach the daemon.
+            const validChannels = channels.filter((c) =>
+              ["observation", "memory", "lesson", "summary"].includes(c),
+            );
             const payload: Record<string, unknown> = {
               query: args.query,
             };
             const limit = asNumber(args.limit);
-            if (limit !== undefined) payload.limit = Math.max(1, Math.min(500, limit));
+            if (args.limit !== undefined) {
+              if (limit === undefined || !Number.isInteger(limit) || limit < 1) {
+                return {
+                  status_code: 400,
+                  body: { error: "limit must be a positive integer" },
+                };
+              }
+              payload.limit = Math.min(500, limit);
+            }
             if (typeof args.since === "string") payload.since = args.since;
             if (typeof args.until === "string") payload.until = args.until;
-            if (channels.length > 0) payload.channels = channels;
+            if (validChannels.length > 0) payload.channels = validChannels;
             if (typeof args.includeAdjacentTurns === "boolean")
               payload.includeAdjacentTurns = args.includeAdjacentTurns;
             if (typeof args.includeGraph === "boolean")
               payload.includeGraph = args.includeGraph;
-            if (typeof args.order === "string") payload.order = args.order;
+            if (args.order !== undefined) {
+              if (
+                typeof args.order !== "string" ||
+                !["asc", "desc"].includes(args.order)
+              ) {
+                return {
+                  status_code: 400,
+                  body: { error: "order must be 'asc' or 'desc'" },
+                };
+              }
+              payload.order = args.order;
+            }
             const result = await sdk.trigger({
               function_id: "mem::lineage",
               payload,
diff --git a/src/triggers/api.ts b/src/triggers/api.ts
index eb2c0dc5..54283f8f 100644
--- a/src/triggers/api.ts
+++ b/src/triggers/api.ts
@@ -1036,9 +1036,24 @@ export function registerApiTriggers(
           body: { error: "order must be 'asc' or 'desc'" },
         };
       }
+      // Whitelisted payload: only forward validated fields, never raw
+      // req.body — caller-controlled keys could otherwise trip
+      // unintended branches in the downstream function. CodeRabbit
+      // caught this on #570.
+      const payload: Record<string, unknown> = { query: body.query };
+      if (body.limit !== undefined) payload.limit = body.limit;
+      if (typeof body.since === "string") payload.since = body.since;
+      if (typeof body.until === "string") payload.until = body.until;
+      if (Array.isArray(body.channels)) payload.channels = body.channels;
+      if (typeof body.includeAdjacentTurns === "boolean")
+        payload.includeAdjacentTurns = body.includeAdjacentTurns;
+      if (typeof body.includeGraph === "boolean")
+        payload.includeGraph = body.includeGraph;
+      if (typeof body.order === "string")
+        payload.order = (body.order as string).trim().toLowerCase();
       const result = await sdk.trigger({
         function_id: "mem::lineage",
-        payload: req.body,
+        payload,
       });
       // mem::lineage returns { error } on validation problems we
       // didn't catch upstream (e.g. empty trimmed query). Surface as 400.

From 4cd1f4c734d6a9659d1f3a7f077d5a60af3b54c6 Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 18:21:42 +0200
Subject: [PATCH 3/8] =?UTF-8?q?fix(lineage):=20CodeRabbit=20#570=20re-revi?=
 =?UTF-8?q?ew=20=E2=80=94=20channels=20rejection=20+=20firstMention=20tieb?=
 =?UTF-8?q?reak?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two follow-up issues from CodeRabbit's review of 6a4de14:

1. `channels` silent broadening: when the user passed `channels` but
   none were in the known enum (e.g. `["foobar","baz"]`), the previous
   fix dropped to an empty `validChannels` and the conditional then
   omitted `payload.channels` entirely — falling back to all-channels
   default. Now: if the user explicitly passed channels but none are
   valid, return 400. Silently broadening invalidates caller intent.

2. `firstMention` could differ by `order`: picking `items[0]` (asc) or
   `items[items.length-1]` (desc) relied on the array's tiebreak rule
   to settle equal-timestamp ties. Two items sharing the earliest
   timestamp on different channels would resolve differently depending
   on `order`. Switch to an order-independent min-by-timestamp reduce
   so the "earliest in filtered set" contract is stable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/functions/lineage.ts | 21 ++++++++++-----------
 src/mcp/server.ts        | 15 +++++++++++++--
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/functions/lineage.ts b/src/functions/lineage.ts
index 34070209..ac5db7f9 100644
--- a/src/functions/lineage.ts
+++ b/src/functions/lineage.ts
@@ -364,17 +364,16 @@ export function registerLineageFunction(sdk: ISdk, kv: StateKV): void {
       for (const it of items) totalsByChannel[it.channel]++;
 
       // firstMention always points at the earliest timestamp in the
-      // ENTIRE filtered set, regardless of `order` or `limit`. Use
-      // `items` (pre-trim, fully sorted), not `trimmed` — otherwise a
-      // session with more hits than the page size + `order:desc` would
-      // report the oldest-in-page as firstMention instead of the actual
-      // earliest mention. CodeRabbit caught this on #570.
-      const earliest =
-        order === "asc"
-          ? items[0]
-          : items.length > 0
-            ? items[items.length - 1]
-            : undefined;
+      // ENTIRE filtered set, independent of `order` AND of how the
+      // tiebreaker ranks items with equal earliest timestamps. Pick the
+      // min-by-timestamp directly instead of trusting position in the
+      // (order-dependent) sorted list — CodeRabbit caught the
+      // tiebreaker variance in the #570 re-review.
+      const earliest = items.length > 0
+        ? items.reduce((a, b) =>
+            Date.parse(a.timestamp) <= Date.parse(b.timestamp) ? a : b,
+          )
+        : undefined;
       const firstMention = earliest
         ? {
             timestamp: earliest.timestamp,
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index e5768758..469244c5 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -283,11 +283,22 @@ export function registerMcpEndpoints(
               };
             }
             const channels = parseCsvList(args.channels);
-            // Filter to the known channel enum so unknown values don't
-            // reach the daemon.
+            // Validate channel names against the enum. If the user
+            // passed channels but NONE are valid, 400 instead of
+            // silently broadening to all channels (CodeRabbit caught
+            // this in the #570 re-review).
             const validChannels = channels.filter((c) =>
               ["observation", "memory", "lesson", "summary"].includes(c),
             );
+            if (channels.length > 0 && validChannels.length === 0) {
+              return {
+                status_code: 400,
+                body: {
+                  error:
+                    "channels must contain at least one of: observation, memory, lesson, summary",
+                },
+              };
+            }
             const payload: Record<string, unknown> = {
               query: args.query,
             };

From f82d30608b04f24653b26143b9f62930aa6ce9c7 Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 01:45:34 +0200
Subject: [PATCH 4/8] =?UTF-8?q?v5-a:=20mem::query=20=E2=80=94=20server-sid?=
 =?UTF-8?q?e=20composable=20retrieval=20pipeline?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Today every mem::* call is a discrete MCP round-trip. Multi-step
recall ("trace a decision, expand session context, summarize") is
N round-trips with manual context-passing, so the LLM falls back to
grep/find because shell composition is cheaper. mem::query collapses
the whole pipeline into one MCP call: ship a JSON pipeline, the
daemon executes it in-process (composing existing mem::* primitives
through sdk.trigger), returns the assembled result + per-step trace.

The point is NOT to add new retrieval primitives. It's to make
composition of the existing 51 primitives a first-class server-side
operation, so memory becomes the first reach for "what do I remember
about X" questions.

PIPELINE SURFACE:
- 12 producers (search/smart_search/lineage/lesson_recall/graph_query/
  facet_query/insight_list/timeline/sessions/frontier/vision_search/
  profile) — wrap existing tools via sdk.trigger
- 11 transformers (filter/sort/limit/take/drop/project/distinct/flatten/
  concat/group_by/top_n_per_group) — pure JS over a normalized envelope
- 3 cross-step (for_each — synthesize/rank rejected inside / join /
  expand_by_session)
- 2 aggregators (synthesize — terminal / rank_by_relevance — one LLM
  call each)
- Named streams (in/out, default "_") for fork+join workflows
- Structured predicates with dot-paths: {field,op,value} + all/any/not
- Read-only by construction (writer ops fail validation)

GUARDS: budget (default 30, max 100), timeoutMs (default 10000, max
30000), maxStepOut (default 500), maxDepth (default 3, max 5),
dry_run (returns plan + estimatedCost without executing).

DISCOVERABILITY: MCP inputSchema declares the full discriminated union
(28 oneOf branches with per-op required fields and enums), description
inlines 3 literal example pipelines + the envelope kinds + the
dry_run-first workflow, so a fresh schema-aware LLM can author
pipelines without reading source.

WIRES:
- src/functions/query.ts (new — executor, per-producer envelope
  mappers, predicate evaluator, dot-path resolver, dry-run, budget/
  timeout/depth/writer-rejection guards)
- mem::query MCP tool in new V020_QUERY_TOOLS block + ESSENTIAL_TOOLS
- McpToolDef.inputSchema.properties type loosened to accept nested
  JSON Schema (items/oneOf/anyOf/const/enum/...) so the discriminated
  union expresses cleanly
- src/types.ts: EnvelopedKind, EnvelopedRecord, Predicate,
  PipelineStep (discriminated union, 28 variants), QueryOptions,
  QueryRequest, QueryResult, StepTrace, QueryCost
- 28 unit tests (query-transformers): predicate evaluator, dot-paths,
  stable sort, project, distinct, flatten, group_by, top_n_per_group,
  join
- 17 integration tests (query-integration): writer rejection, dry_run,
  records/synthesis modes, budget_exceeded, named-stream join, group_by
  + top_n_per_group, for_each merge, rank_by_relevance, expand_by_session
  from KV, out-defaults-to-_, sessions producer, trace shape

Total MCP tool count 52 → 53.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                       |    6 +-
 src/functions/query.ts          | 1472 +++++++++++++++++++++++++++++++
 src/index.ts                    |    2 +
 src/mcp/server.ts               |   25 +
 src/mcp/tools-registry.ts       |  557 +++++++++++-
 src/types.ts                    |  270 ++++++
 test/query-integration.test.ts  |  501 +++++++++++
 test/query-transformers.test.ts |  267 ++++++
 8 files changed, 3096 insertions(+), 4 deletions(-)
 create mode 100644 src/functions/query.ts
 create mode 100644 test/query-integration.test.ts
 create mode 100644 test/query-transformers.test.ts

diff --git a/README.md b/README.md
index c3775a11..ae4f1f9a 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@
 <p align="center">
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-recall.svg"><img src="assets/tags/stat-recall.svg" alt="95.2% retrieval R@5" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tokens.svg"><img src="assets/tags/stat-tokens.svg" alt="92% fewer tokens" height="38" /></picture>
-  <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tools.svg"><img src="assets/tags/stat-tools.svg" alt="54 MCP tools" height="38" /></picture>
+  <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tools.svg"><img src="assets/tags/stat-tools.svg" alt="55 MCP tools" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-hooks.svg"><img src="assets/tags/stat-hooks.svg" alt="12 auto hooks" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-deps.svg"><img src="assets/tags/stat-deps.svg" alt="0 external DBs" height="38" /></picture>
   <picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/stat-tests.svg"><img src="assets/tags/stat-tests.svg" alt="950+ tests passing" height="38" /></picture>
@@ -408,7 +408,7 @@ Implementation details live in `src/cli.ts` (see `runUpgrade` around the `src/cl
 ### Claude Code (one block, paste it)
 
 ```
-Install agentmemory: run `npx @agentmemory/agentmemory` in a separate terminal to start the memory server. Then run `/plugin marketplace add rohitg00/agentmemory` and `/plugin install agentmemory` — the plugin registers all 12 hooks, 4 skills, AND auto-wires the `@agentmemory/mcp` stdio server via its `.mcp.json`, so you get 54 MCP tools (memory_smart_search, memory_save, memory_sessions, memory_governance_delete, etc.) without any extra config step. Verify with `curl http://localhost:3111/agentmemory/health`. The real-time viewer is at http://localhost:3113.
+Install agentmemory: run `npx @agentmemory/agentmemory` in a separate terminal to start the memory server. Then run `/plugin marketplace add rohitg00/agentmemory` and `/plugin install agentmemory` — the plugin registers all 12 hooks, 4 skills, AND auto-wires the `@agentmemory/mcp` stdio server via its `.mcp.json`, so you get 55 MCP tools (memory_smart_search, memory_save, memory_sessions, memory_governance_delete, etc.) without any extra config step. Verify with `curl http://localhost:3111/agentmemory/health`. The real-time viewer is at http://localhost:3113.
 ```
 
 ### Codex CLI (Codex plugin platform)
@@ -799,7 +799,7 @@ npm install @xenova/transformers
 
 <h2 id="mcp-server"><picture><source media="(prefers-color-scheme: dark)" srcset="assets/tags/light/section-mcp.svg"><img src="assets/tags/section-mcp.svg" alt="MCP Server" height="32" /></picture></h2>
 
-54 tools, 6 resources, 3 prompts, and 4 skills — the most comprehensive MCP memory toolkit for any agent.
+55 tools, 6 resources, 3 prompts, and 4 skills — the most comprehensive MCP memory toolkit for any agent.
 
 > **MCP shim vs full server:** the published `@agentmemory/mcp` package is a thin shim. It exposes the full 51-tool surface **only when it can reach a running agentmemory server** via `AGENTMEMORY_URL` (proxy mode). With no server reachable, the shim falls back to a 7-tool local set (`memory_save`, `memory_recall`, `memory_smart_search`, `memory_sessions`, `memory_export`, `memory_audit`, `memory_governance_delete`). The `AGENTMEMORY_TOOLS=core|all` env var is a *server-side* flag — setting it in the shim's `env` block has no effect. If you see only 7 tools in Cursor / OpenCode / Gemini CLI, start `npx @agentmemory/agentmemory` (or the Docker stack) and set `AGENTMEMORY_URL=http://localhost:3111`.
 
diff --git a/src/functions/query.ts b/src/functions/query.ts
new file mode 100644
index 00000000..2aedf546
--- /dev/null
+++ b/src/functions/query.ts
@@ -0,0 +1,1472 @@
+import type { ISdk } from "iii-sdk";
+import type { StateKV } from "../state/kv.js";
+import type {
+  CompressedObservation,
+  EnvelopedKind,
+  EnvelopedRecord,
+  GraphEdge,
+  GraphNode,
+  Insight,
+  Lesson,
+  LineageResult,
+  MemoryProvider,
+  PipelineOpName,
+  PipelineStep,
+  Predicate,
+  ProjectProfile,
+  QueryCost,
+  QueryRequest,
+  QueryResult,
+  SearchResult,
+  Session,
+  SessionSummary,
+  StepTrace,
+  TimelineItem,
+} from "../types.js";
+import { KV } from "../state/schema.js";
+import { logger } from "../logger.js";
+import { safeAudit } from "./audit.js";
+
+// v5-A: server-side composable retrieval pipeline.
+// Executor for `mem::query`. Composes existing mem::* tools through
+// sdk.trigger so this file never reimplements producer logic — only
+// adapts results into a normalized envelope, runs pure-JS transformers,
+// and dispatches LLM aggregators against the injected provider.
+
+const COST_CLASS: Record<PipelineOpName, 1 | 3 | 10> = {
+  // Producers — medium
+  search: 3,
+  smart_search: 3,
+  lineage: 3,
+  lesson_recall: 3,
+  graph_query: 3,
+  facet_query: 3,
+  insight_list: 3,
+  timeline: 3,
+  sessions: 3,
+  frontier: 3,
+  vision_search: 3,
+  profile: 3,
+  // Transformers — cheap
+  filter: 1,
+  sort: 1,
+  limit: 1,
+  take: 1,
+  drop: 1,
+  project: 1,
+  distinct: 1,
+  flatten: 1,
+  concat: 1,
+  group_by: 1,
+  top_n_per_group: 1,
+  // Cross-step — medium (do real work)
+  for_each: 3,
+  join: 3,
+  expand_by_session: 3,
+  // Aggregators — expensive (LLM)
+  synthesize: 10,
+  rank_by_relevance: 10,
+};
+
+const ALLOWED_OPS = new Set<PipelineOpName>(Object.keys(COST_CLASS) as PipelineOpName[]);
+
+const PRODUCER_FNID: Partial<Record<PipelineOpName, string>> = {
+  search: "mem::search",
+  smart_search: "mem::smart-search",
+  lineage: "mem::lineage",
+  lesson_recall: "mem::lesson-recall",
+  graph_query: "mem::graph-query",
+  facet_query: "mem::facet-query",
+  insight_list: "mem::insight-list",
+  timeline: "mem::timeline",
+  frontier: "mem::frontier",
+  vision_search: "mem::vision-search",
+  profile: "mem::profile",
+  // sessions: no trigger function; the executor reads kv.list(KV.sessions) directly.
+};
+
+// file_history (mem::file-context) is intentionally NOT exposed as a
+// producer in v5-A: it returns a textual digest (`{context: string}`)
+// rather than a structured record list, so it doesn't fit the envelope
+// model. Use `lineage` or `search` with file-related queries instead.
+
+const DEFAULTS = {
+  budget: 30,
+  budgetMax: 100,
+  timeoutMs: 10_000,
+  timeoutMaxMs: 30_000,
+  maxStepOut: 500,
+  maxDepth: 3,
+};
+
+const SYNTH_SYSTEM_PROMPT =
+  "You are a memory-recall assistant. The user asks a question and you have a small set of records (observations, memories, lessons, summaries) from past sessions. Produce a concise answer that cites specific records by their `_id`. If the records do not answer the question, say so plainly. Do not invent facts not present in the records.";
+
+// ---------------------------------------------------------------------------
+// Predicate evaluator
+// ---------------------------------------------------------------------------
+
+export function resolveDotPath(record: Record<string, unknown>, path: string): unknown {
+  const parts = path.split(".");
+  let cur: unknown = record;
+  for (const p of parts) {
+    if (cur === null || cur === undefined) return undefined;
+    if (typeof cur !== "object") return undefined;
+    cur = (cur as Record<string, unknown>)[p];
+  }
+  return cur;
+}
+
+export function evalPredicate(pred: Predicate, record: EnvelopedRecord): boolean {
+  if ("all" in pred) return pred.all.every((p) => evalPredicate(p, record));
+  if ("any" in pred) return pred.any.some((p) => evalPredicate(p, record));
+  if ("not" in pred) return !evalPredicate(pred.not, record);
+
+  const v = resolveDotPath(record as unknown as Record<string, unknown>, pred.field);
+  switch (pred.op) {
+    case "eq":
+      return v === pred.value;
+    case "neq":
+      return v !== pred.value;
+    case "in":
+      return Array.isArray(pred.value) && (pred.value as unknown[]).includes(v);
+    case "not_in":
+      return Array.isArray(pred.value) && !(pred.value as unknown[]).includes(v);
+    case "gt":
+      return typeof v === "number" && typeof pred.value === "number" && v > pred.value;
+    case "gte":
+      return typeof v === "number" && typeof pred.value === "number" && v >= pred.value;
+    case "lt":
+      return typeof v === "number" && typeof pred.value === "number" && v < pred.value;
+    case "lte":
+      return typeof v === "number" && typeof pred.value === "number" && v <= pred.value;
+    case "contains":
+      return (
+        typeof v === "string" &&
+        typeof pred.value === "string" &&
+        v.toLowerCase().includes(pred.value.toLowerCase())
+      );
+    case "starts_with":
+      return (
+        typeof v === "string" &&
+        typeof pred.value === "string" &&
+        v.toLowerCase().startsWith(pred.value.toLowerCase())
+      );
+    case "exists":
+      return v !== undefined && v !== null && v !== "";
+    case "since":
+      if (typeof v !== "string" || typeof pred.value !== "string") return false;
+      return Date.parse(v) >= Date.parse(pred.value);
+    case "until":
+      if (typeof v !== "string" || typeof pred.value !== "string") return false;
+      return Date.parse(v) <= Date.parse(pred.value);
+    default:
+      return false;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Producer mappers
+// ---------------------------------------------------------------------------
+
+function mapSearchResults(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const results = (raw as { results?: unknown }).results;
+  if (!Array.isArray(results)) return [];
+  return results
+    .filter((r): r is SearchResult => !!r && typeof r === "object" && "observation" in (r as object))
+    .map((r) => {
+      const obs = r.observation;
+      const env: EnvelopedRecord = {
+        _kind: "observation",
+        _id: obs.id,
+        _sessionId: r.sessionId,
+        _createdAt: obs.timestamp,
+        _score: r.score,
+        _kindSpecific: obs.type,
+        _source: { op: "search", stepId },
+        title: obs.title,
+        narrative: obs.narrative,
+        type: obs.type,
+      };
+      return env;
+    });
+}
+
+function mapLineageResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const r = raw as LineageResult;
+  if (!Array.isArray(r.timeline)) return [];
+  return r.timeline.map((t: TimelineItem) => {
+    const kind: EnvelopedKind =
+      t.channel === "observation"
+        ? "observation"
+        : t.channel === "memory"
+          ? "memory"
+          : t.channel === "lesson"
+            ? "lesson"
+            : "summary";
+    const env: EnvelopedRecord = {
+      _kind: kind,
+      _id: t.id,
+      _sessionId: t.sessionId,
+      _project: t.project,
+      _createdAt: t.timestamp,
+      _score: t.score,
+      _kindSpecific: t.memoryType ?? t.type,
+      _source: { op: "lineage", stepId },
+      title: t.title,
+      snippet: t.snippet,
+      channel: t.channel,
+    };
+    if (t.session) env["session"] = t.session;
+    if (t.adjacentTurns) env["adjacentTurns"] = t.adjacentTurns;
+    if (t.sourceFile) env["sourceFile"] = t.sourceFile;
+    return env;
+  });
+}
+
+function mapLessonRecallResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const lessons = (raw as { lessons?: unknown }).lessons;
+  if (!Array.isArray(lessons)) return [];
+  return lessons
+    .filter((l): l is Lesson & { score?: number } => !!l && typeof l === "object" && "id" in l)
+    .map((l) => {
+      const env: EnvelopedRecord = {
+        _kind: "lesson",
+        _id: l.id,
+        _project: l.project,
+        _createdAt: l.createdAt,
+        _score: l.score ?? l.confidence,
+        _source: { op: "lesson_recall", stepId },
+        content: l.content,
+        context: l.context,
+        confidence: l.confidence,
+        tags: l.tags,
+      };
+      return env;
+    });
+}
+
+function mapSmartSearchResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const r = raw as { results?: unknown; mode?: string };
+  if (!Array.isArray(r.results)) return [];
+  // Compact mode: {obsId, sessionId, title, type, score, timestamp}
+  // Expanded mode: {obsId, sessionId, observation}
+  return r.results
+    .map((item) => {
+      if (!item || typeof item !== "object") return null;
+      const i = item as Record<string, unknown>;
+      if (i["observation"] && typeof i["observation"] === "object") {
+        const obs = i["observation"] as CompressedObservation;
+        const env: EnvelopedRecord = {
+          _kind: "observation",
+          _id: obs.id,
+          _sessionId: typeof i["sessionId"] === "string" ? i["sessionId"] : obs.sessionId,
+          _createdAt: obs.timestamp,
+          _kindSpecific: obs.type,
+          _source: { op: "smart_search", stepId },
+          title: obs.title,
+          narrative: obs.narrative,
+          type: obs.type,
+        };
+        return env;
+      }
+      const env: EnvelopedRecord = {
+        _kind: "observation",
+        _id: String(i["obsId"]),
+        _sessionId: typeof i["sessionId"] === "string" ? (i["sessionId"] as string) : undefined,
+        _createdAt: typeof i["timestamp"] === "string" ? (i["timestamp"] as string) : undefined,
+        _score: typeof i["score"] === "number" ? (i["score"] as number) : undefined,
+        _kindSpecific: typeof i["type"] === "string" ? (i["type"] as string) : undefined,
+        _source: { op: "smart_search", stepId },
+        title: i["title"],
+        type: i["type"],
+      };
+      return env;
+    })
+    .filter((e): e is EnvelopedRecord => e !== null);
+}
+
+function mapGraphQueryResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const r = raw as { nodes?: GraphNode[]; edges?: GraphEdge[] };
+  const out: EnvelopedRecord[] = [];
+  if (Array.isArray(r.nodes)) {
+    for (const n of r.nodes) {
+      out.push({
+        _kind: "graph_node",
+        _id: n.id,
+        _kindSpecific: n.type,
+        _source: { op: "graph_query", stepId },
+        name: n.name,
+        nodeType: n.type,
+        properties: n.properties,
+        sourceObservationIds: n.sourceObservationIds,
+      });
+    }
+  }
+  if (Array.isArray(r.edges)) {
+    for (const e of r.edges) {
+      out.push({
+        _kind: "graph_edge",
+        _id: e.id,
+        _kindSpecific: e.type,
+        _source: { op: "graph_query", stepId },
+        edgeType: e.type,
+        sourceNodeId: e.sourceNodeId,
+        targetNodeId: e.targetNodeId,
+      });
+    }
+  }
+  return out;
+}
+
+function mapFacetQueryResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const r = raw as { results?: unknown };
+  if (!Array.isArray(r.results)) return [];
+  return r.results
+    .filter(
+      (f): f is { targetId: string; targetType: string; matchedFacets: string[] } =>
+        !!f && typeof f === "object" && typeof (f as { targetId?: unknown }).targetId === "string",
+    )
+    .map((f) => {
+      const env: EnvelopedRecord = {
+        _kind: "facet_hit",
+        _id: f.targetId,
+        _kindSpecific: f.targetType,
+        _source: { op: "facet_query", stepId },
+        targetType: f.targetType,
+        matchedFacets: f.matchedFacets,
+      };
+      return env;
+    });
+}
+
+function mapInsightListResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const insights = (raw as { insights?: unknown }).insights;
+  if (!Array.isArray(insights)) return [];
+  return insights
+    .filter((i): i is Insight => !!i && typeof i === "object" && "id" in i)
+    .map((i) => {
+      const env: EnvelopedRecord = {
+        _kind: "insight",
+        _id: i.id,
+        _project: i.project,
+        _createdAt: i.createdAt,
+        _score: i.confidence,
+        _source: { op: "insight_list", stepId },
+        title: i.title,
+        content: i.content,
+        confidence: i.confidence,
+        sourceConceptCluster: i.sourceConceptCluster,
+        sourceMemoryIds: i.sourceMemoryIds,
+      };
+      return env;
+    });
+}
+
+function mapTimelineResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const entries = (raw as { entries?: unknown }).entries;
+  if (!Array.isArray(entries)) return [];
+  return entries
+    .filter((e) => e && typeof e === "object" && "observation" in (e as object))
+    .map((e) => {
+      const obs = (e as { observation: CompressedObservation }).observation;
+      const sessionId = (e as { sessionId?: string }).sessionId;
+      const env: EnvelopedRecord = {
+        _kind: "timeline_item",
+        _id: obs.id,
+        _sessionId: sessionId,
+        _createdAt: obs.timestamp,
+        _kindSpecific: obs.type,
+        _source: { op: "timeline", stepId },
+        title: obs.title,
+        narrative: obs.narrative,
+        type: obs.type,
+        relativePosition: (e as { relativePosition?: number }).relativePosition,
+      };
+      return env;
+    });
+}
+
+function mapSessionsList(sessions: Session[], stepId?: string, projectFilter?: string): EnvelopedRecord[] {
+  const filtered = projectFilter ? sessions.filter((s) => s.project === projectFilter) : sessions;
+  return filtered.map((s) => {
+    const env: EnvelopedRecord = {
+      _kind: "session",
+      _id: s.id,
+      _project: s.project,
+      _createdAt: s.startedAt,
+      _source: { op: "sessions", stepId },
+      project: s.project,
+      status: (s as Session & { status?: string }).status,
+      startedAt: s.startedAt,
+      firstPrompt: s.firstPrompt,
+    };
+    return env;
+  });
+}
+
+function mapFrontierResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const frontier = (raw as { frontier?: unknown }).frontier;
+  if (!Array.isArray(frontier)) return [];
+  return frontier
+    .filter((f) => f && typeof f === "object" && "action" in (f as object))
+    .map((f) => {
+      const fi = f as { action: { id: string; project?: string; status?: string; title?: string; priority?: number }; score: number; leased?: boolean };
+      const env: EnvelopedRecord = {
+        _kind: "frontier_entry",
+        _id: fi.action.id,
+        _project: fi.action.project,
+        _score: fi.score,
+        _kindSpecific: fi.action.status,
+        _source: { op: "frontier", stepId },
+        title: fi.action.title,
+        priority: fi.action.priority,
+        status: fi.action.status,
+        leased: fi.leased,
+      };
+      return env;
+    });
+}
+
+function mapVisionSearchResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const results = (raw as { results?: unknown }).results;
+  if (!Array.isArray(results)) return [];
+  return results
+    .filter(
+      (r): r is { imageRef: string; score: number; sessionId?: string; observationId?: string; updatedAt?: string } =>
+        !!r && typeof r === "object" && typeof (r as { imageRef?: unknown }).imageRef === "string",
+    )
+    .map((r) => {
+      const env: EnvelopedRecord = {
+        _kind: "vision_hit",
+        _id: r.imageRef,
+        _sessionId: r.sessionId,
+        _createdAt: r.updatedAt,
+        _score: r.score,
+        _source: { op: "vision_search", stepId },
+        imageRef: r.imageRef,
+        observationId: r.observationId,
+      };
+      return env;
+    });
+}
+
+function mapProfileResult(raw: unknown, stepId?: string): EnvelopedRecord[] {
+  if (!raw || typeof raw !== "object") return [];
+  const profile = (raw as { profile?: ProjectProfile | null }).profile;
+  if (!profile || typeof profile !== "object") return [];
+  const env: EnvelopedRecord = {
+    _kind: "profile",
+    _id: profile.project,
+    _project: profile.project,
+    _createdAt: profile.updatedAt,
+    _source: { op: "profile", stepId },
+    topConcepts: profile.topConcepts,
+    topFiles: profile.topFiles,
+    conventions: profile.conventions,
+    commonErrors: profile.commonErrors,
+    recentActivity: profile.recentActivity,
+    sessionCount: profile.sessionCount,
+    totalObservations: profile.totalObservations,
+  };
+  return [env];
+}
+
+// ---------------------------------------------------------------------------
+// Transformers
+// ---------------------------------------------------------------------------
+
+function compareForSort(a: unknown, b: unknown): number {
+  if (a === b) return 0;
+  if (a === undefined || a === null) return 1;
+  if (b === undefined || b === null) return -1;
+  if (typeof a === "number" && typeof b === "number") return a - b;
+  // ISO timestamps sort correctly as strings, but parse to number for safety
+  if (typeof a === "string" && typeof b === "string") {
+    const ta = Date.parse(a);
+    const tb = Date.parse(b);
+    if (Number.isFinite(ta) && Number.isFinite(tb)) return ta - tb;
+    return a < b ? -1 : 1;
+  }
+  return 0;
+}
+
+export function stableSort(
+  records: EnvelopedRecord[],
+  by: string | string[],
+  dir: "asc" | "desc" = "desc",
+): EnvelopedRecord[] {
+  const keys = Array.isArray(by) ? by : [by];
+  const sign = dir === "asc" ? 1 : -1;
+  return records
+    .map((r, i) => ({ r, i }))
+    .sort((x, y) => {
+      for (const k of keys) {
+        const c = compareForSort(
+          resolveDotPath(x.r as unknown as Record<string, unknown>, k),
+          resolveDotPath(y.r as unknown as Record<string, unknown>, k),
+        );
+        if (c !== 0) return sign * c;
+      }
+      return x.i - y.i;
+    })
+    .map((wrapped) => wrapped.r);
+}
+
+// ---------------------------------------------------------------------------
+// Additional transformers
+// ---------------------------------------------------------------------------
+
+export function applyProject(
+  records: EnvelopedRecord[],
+  fields: string[] | undefined,
+  rename: Record<string, string> | undefined,
+): EnvelopedRecord[] {
+  return records.map((r) => {
+    let out: EnvelopedRecord;
+    if (fields && fields.length > 0) {
+      // Always keep envelope core fields so downstream ops still work.
+      const core: EnvelopedRecord = {
+        _kind: r._kind,
+        _id: r._id,
+        _source: r._source,
+      };
+      if (r._sessionId !== undefined) core._sessionId = r._sessionId;
+      if (r._project !== undefined) core._project = r._project;
+      if (r._createdAt !== undefined) core._createdAt = r._createdAt;
+      if (r._score !== undefined) core._score = r._score;
+      if (r._kindSpecific !== undefined) core._kindSpecific = r._kindSpecific;
+      for (const f of fields) {
+        const v = resolveDotPath(r as unknown as Record<string, unknown>, f);
+        if (v !== undefined) core[f] = v;
+      }
+      out = core;
+    } else {
+      out = { ...r };
+    }
+    if (rename) {
+      for (const [from, to] of Object.entries(rename)) {
+        const v = resolveDotPath(out as unknown as Record<string, unknown>, from);
+        if (v !== undefined) {
+          out[to] = v;
+        }
+      }
+    }
+    return out;
+  });
+}
+
+export function applyDistinct(records: EnvelopedRecord[], by: string): EnvelopedRecord[] {
+  const seen = new Set<unknown>();
+  const out: EnvelopedRecord[] = [];
+  for (const r of records) {
+    const key = resolveDotPath(r as unknown as Record<string, unknown>, by);
+    if (seen.has(key)) continue;
+    seen.add(key);
+    out.push(r);
+  }
+  return out;
+}
+
+export function applyFlatten(records: EnvelopedRecord[], field: string): EnvelopedRecord[] {
+  const out: EnvelopedRecord[] = [];
+  for (const r of records) {
+    const v = resolveDotPath(r as unknown as Record<string, unknown>, field);
+    if (Array.isArray(v)) {
+      for (const item of v) {
+        out.push({ ...r, [field]: item });
+      }
+    } else {
+      out.push(r);
+    }
+  }
+  return out;
+}
+
+export function applyGroupBy(records: EnvelopedRecord[], by: string): EnvelopedRecord[] {
+  const groups = new Map<string, EnvelopedRecord[]>();
+  for (const r of records) {
+    const k = resolveDotPath(r as unknown as Record<string, unknown>, by);
+    const key = k === undefined || k === null ? "__null__" : String(k);
+    let arr = groups.get(key);
+    if (!arr) {
+      arr = [];
+      groups.set(key, arr);
+    }
+    arr.push(r);
+  }
+  const out: EnvelopedRecord[] = [];
+  for (const [key, members] of groups) {
+    out.push({
+      _kind: "group",
+      _id: `group:${key}`,
+      _source: { op: "group_by" },
+      _groupKey: key,
+      _groupSize: members.length,
+      members,
+    });
+  }
+  return out;
+}
+
+export function applyTopNPerGroup(
+  records: EnvelopedRecord[],
+  n: number,
+  by: string | undefined,
+  dir: "asc" | "desc",
+): EnvelopedRecord[] {
+  // Operates on a group-typed stream produced by group_by. Re-flattens
+  // to a flat record stream containing the top-N members of each group.
+  const out: EnvelopedRecord[] = [];
+  for (const g of records) {
+    if (g._kind !== "group" || !Array.isArray(g["members"])) {
+      out.push(g);
+      continue;
+    }
+    const members = (g["members"] as EnvelopedRecord[]).slice();
+    const sortKey = by ?? "_score";
+    const sorted = stableSort(members, sortKey, dir);
+    out.push(...sorted.slice(0, Math.max(0, n | 0)));
+  }
+  return out;
+}
+
+export function applyJoin(
+  left: EnvelopedRecord[],
+  right: EnvelopedRecord[],
+  on: { left: string; right: string },
+  type: "inner" | "left",
+): EnvelopedRecord[] {
+  const rightIndex = new Map<unknown, EnvelopedRecord[]>();
+  for (const r of right) {
+    const k = resolveDotPath(r as unknown as Record<string, unknown>, on.right);
+    let arr = rightIndex.get(k);
+    if (!arr) {
+      arr = [];
+      rightIndex.set(k, arr);
+    }
+    arr.push(r);
+  }
+  const out: EnvelopedRecord[] = [];
+  for (const l of left) {
+    const k = resolveDotPath(l as unknown as Record<string, unknown>, on.left);
+    const matches = rightIndex.get(k);
+    if (!matches || matches.length === 0) {
+      if (type === "inner") continue;
+      out.push({ ...l, _join: { right: null } });
+      continue;
+    }
+    for (const m of matches) {
+      out.push({ ...l, _join: { right: m } });
+    }
+  }
+  return out;
+}
+
+// ---------------------------------------------------------------------------
+// Synthesize (LLM aggregator)
+// ---------------------------------------------------------------------------
+
+function buildSynthPrompt(
+  records: EnvelopedRecord[],
+  question: string,
+  style: "answer" | "bullets" | "timeline",
+  maxCitations: number,
+): string {
+  const trimmed = records.slice(0, Math.max(maxCitations * 3, 12));
+  const lines: string[] = [];
+  lines.push(`Question: ${question}`);
+  lines.push(`Style: ${style}`);
+  lines.push(`Records (kind, _id, key fields):`);
+  for (const r of trimmed) {
+    const snippet =
+      typeof r["snippet"] === "string"
+        ? r["snippet"]
+        : typeof r["narrative"] === "string"
+          ? r["narrative"]
+          : typeof r["content"] === "string"
+            ? r["content"]
+            : typeof r["title"] === "string"
+              ? r["title"]
+              : "";
+    const compact =
+      snippet.length > 400 ? snippet.slice(0, 400).replace(/\s+/g, " ") + "…" : snippet.replace(/\s+/g, " ");
+    lines.push(
+      `- [${r._kind}] _id=${r._id}${r._createdAt ? ` @${r._createdAt}` : ""}${r._project ? ` proj=${r._project}` : ""}: ${compact}`,
+    );
+  }
+  lines.push("");
+  lines.push(
+    `Produce a ${style} response. After the response, on its own line, write a JSON array of citations like: CITATIONS: [{"kind":"memory","id":"..."}].`,
+  );
+  return lines.join("\n");
+}
+
+function parseSynthesis(
+  rawText: string,
+  records: EnvelopedRecord[],
+  maxCitations: number,
+): { summary: string; citations: { kind: EnvelopedKind; id: string }[] } {
+  const idx = rawText.lastIndexOf("CITATIONS:");
+  let summary = rawText.trim();
+  const citations: { kind: EnvelopedKind; id: string }[] = [];
+  if (idx >= 0) {
+    summary = rawText.slice(0, idx).trim();
+    const tail = rawText.slice(idx + "CITATIONS:".length).trim();
+    try {
+      const parsed = JSON.parse(tail);
+      if (Array.isArray(parsed)) {
+        for (const c of parsed.slice(0, maxCitations)) {
+          if (c && typeof c === "object" && typeof c.id === "string" && typeof c.kind === "string") {
+            citations.push({ kind: c.kind as EnvelopedKind, id: c.id });
+          }
+        }
+      }
+    } catch {
+      // ignore — fall back to id sniff below
+    }
+  }
+  if (citations.length === 0) {
+    // Sniff record ids that appear in the summary text.
+    const byId = new Map(records.map((r) => [r._id, r._kind]));
+    for (const [id, kind] of byId) {
+      if (summary.includes(id)) {
+        citations.push({ kind, id });
+        if (citations.length >= maxCitations) break;
+      }
+    }
+  }
+  return { summary, citations };
+}
+
+// ---------------------------------------------------------------------------
+// expand_by_session + rank_by_relevance
+// ---------------------------------------------------------------------------
+
+async function applyExpandBySession(
+  records: EnvelopedRecord[],
+  field: string,
+  ctx: ExecCtx,
+): Promise<EnvelopedRecord[]> {
+  const cache = new Map<string, { session: Session | null; summary: SessionSummary | null }>();
+  const loadFor = async (sid: string): Promise<{ session: Session | null; summary: SessionSummary | null }> => {
+    const cached = cache.get(sid);
+    if (cached) return cached;
+    let session: Session | null = null;
+    let summary: SessionSummary | null = null;
+    try {
+      session = (await ctx.kv.get<Session>(KV.sessions, sid)) ?? null;
+    } catch {
+      session = null;
+    }
+    try {
+      summary = (await ctx.kv.get<SessionSummary>(KV.summaries, sid)) ?? null;
+    } catch {
+      summary = null;
+    }
+    const entry = { session, summary };
+    cache.set(sid, entry);
+    return entry;
+  };
+  const out: EnvelopedRecord[] = [];
+  for (const r of records) {
+    const sid = resolveDotPath(r as unknown as Record<string, unknown>, field);
+    if (typeof sid !== "string" || !sid) {
+      out.push(r);
+      continue;
+    }
+    const { session, summary } = await loadFor(sid);
+    out.push({
+      ...r,
+      _session: session
+        ? {
+            id: session.id,
+            project: session.project,
+            startedAt: session.startedAt,
+            firstPrompt: session.firstPrompt,
+          }
+        : null,
+      _summary: summary
+        ? { title: summary.title, narrative: summary.narrative, createdAt: summary.createdAt }
+        : null,
+    });
+  }
+  return out;
+}
+
+const RANK_SYSTEM_PROMPT =
+  "You are a relevance scorer. Given a target query and a list of records (each with `_id` and a brief content), return a JSON array of {id, score} where score is a float in [0,1] expressing how well that record answers the target. Output ONLY the JSON array on a single line, no prose.";
+
+function buildRankPrompt(records: EnvelopedRecord[], target: string): string {
+  const lines: string[] = [];
+  lines.push(`Target: ${target}`);
+  lines.push(`Records:`);
+  for (const r of records) {
+    const snippet =
+      typeof r["snippet"] === "string"
+        ? r["snippet"]
+        : typeof r["narrative"] === "string"
+          ? r["narrative"]
+          : typeof r["content"] === "string"
+            ? r["content"]
+            : typeof r["title"] === "string"
+              ? r["title"]
+              : "";
+    const compact =
+      snippet.length > 300 ? snippet.slice(0, 300).replace(/\s+/g, " ") + "…" : snippet.replace(/\s+/g, " ");
+    lines.push(`- id=${r._id} [${r._kind}]: ${compact}`);
+  }
+  lines.push("");
+  lines.push("Return: [{\"id\":\"...\",\"score\":0.0}, ...]");
+  return lines.join("\n");
+}
+
+function parseRankScores(text: string): Map<string, number> {
+  const m = new Map<string, number>();
+  const match = text.match(/\[[\s\S]*\]/);
+  if (!match) return m;
+  try {
+    const parsed = JSON.parse(match[0]);
+    if (Array.isArray(parsed)) {
+      for (const item of parsed) {
+        if (item && typeof item === "object" && typeof item.id === "string" && typeof item.score === "number") {
+          m.set(item.id, item.score);
+        }
+      }
+    }
+  } catch {
+    // ignore — empty map means scores stay as-is
+  }
+  return m;
+}
+
+async function applyRankByRelevance(
+  records: EnvelopedRecord[],
+  target: string,
+  topK: number | undefined,
+  ctx: ExecCtx,
+): Promise<EnvelopedRecord[]> {
+  if (records.length === 0) return [];
+  const sample = records.slice(0, 50);
+  const userPrompt = buildRankPrompt(sample, target);
+  const text = await ctx.provider.summarize(RANK_SYSTEM_PROMPT, userPrompt);
+  ctx.llmCalls += 1;
+  const scores = parseRankScores(text);
+  const scored = records.map((r) => {
+    const s = scores.get(r._id);
+    return s !== undefined ? { ...r, _score: s } : { ...r };
+  });
+  const ranked = scored.sort((a, b) => (b._score ?? -Infinity) - (a._score ?? -Infinity));
+  return topK ? ranked.slice(0, Math.max(0, topK | 0)) : ranked;
+}
+
+// ---------------------------------------------------------------------------
+// Validation
+// ---------------------------------------------------------------------------
+
+function validatePipeline(
+  pipeline: unknown,
+  ctx: { path?: string; depth?: number } = {},
+): { ok: true; pipeline: PipelineStep[] } | { ok: false; error: string } {
+  const path = ctx.path ?? "pipeline";
+  const depth = ctx.depth ?? 0;
+  if (!Array.isArray(pipeline)) return { ok: false, error: `${path} must be an array` };
+  if (pipeline.length === 0) return { ok: false, error: `${path} must contain at least one step` };
+  if (pipeline.length > 32) return { ok: false, error: `${path} exceeds maximum of 32 steps` };
+  for (let i = 0; i < pipeline.length; i++) {
+    const s = pipeline[i];
+    if (!s || typeof s !== "object" || typeof (s as { op?: unknown }).op !== "string") {
+      return { ok: false, error: `${path}[${i}]: missing 'op'` };
+    }
+    const op = (s as { op: string }).op as PipelineOpName;
+    if (!ALLOWED_OPS.has(op)) {
+      return {
+        ok: false,
+        error: `${path}[${i}]: op '${op}' is not allowed in mem::query (writers and unknown ops are rejected)`,
+      };
+    }
+    if (op === "synthesize" && i !== pipeline.length - 1) {
+      return { ok: false, error: `${path}[${i}]: 'synthesize' must be the terminal step` };
+    }
+    if (op === "for_each") {
+      if (depth + 1 > DEFAULTS.maxDepth) {
+        return { ok: false, error: `${path}[${i}]: for_each depth exceeds ${DEFAULTS.maxDepth}` };
+      }
+      const sub = (s as { do?: unknown }).do;
+      if (!Array.isArray(sub)) {
+        return { ok: false, error: `${path}[${i}]: for_each requires 'do' (array of steps)` };
+      }
+      // Disallow LLM aggregators inside for_each (cost blowup).
+      for (let j = 0; j < sub.length; j++) {
+        const subStep = sub[j];
+        const subOp = (subStep as { op?: unknown })?.op;
+        if (subOp === "synthesize" || subOp === "rank_by_relevance") {
+          return {
+            ok: false,
+            error: `${path}[${i}].do[${j}]: '${subOp}' is not allowed inside for_each (LLM blowup)`,
+          };
+        }
+      }
+      const subResult = validatePipeline(sub, { path: `${path}[${i}].do`, depth: depth + 1 });
+      if (!subResult.ok) return subResult;
+    }
+  }
+  return { ok: true, pipeline: pipeline as PipelineStep[] };
+}
+
+// ---------------------------------------------------------------------------
+// Cost estimation (for dry_run)
+// ---------------------------------------------------------------------------
+
+function estimatePipelineCost(pipeline: PipelineStep[]): { min: number; max: number } {
+  let total = 0;
+  for (const step of pipeline) total += COST_CLASS[step.op];
+  return { min: total, max: total };
+}
+
+// ---------------------------------------------------------------------------
+// Executor
+// ---------------------------------------------------------------------------
+
+interface ExecCtx {
+  sdk: ISdk;
+  kv: StateKV;
+  provider: MemoryProvider;
+  startedAt: number;
+  deadlineAt: number;
+  budget: { spent: number; cap: number };
+  llmCalls: number;
+  maxStepOut: number;
+  maxDepth: number;
+  warnings: string[];
+}
+
+function enforceDeadline(ctx: ExecCtx, stepLabel: string): void {
+  if (Date.now() > ctx.deadlineAt) {
+    throw new QueryRuntimeError(`deadline_exceeded at ${stepLabel}`);
+  }
+}
+
+class QueryRuntimeError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "QueryRuntimeError";
+  }
+}
+
+async function runProducer(
+  step: PipelineStep,
+  ctx: ExecCtx,
+): Promise<EnvelopedRecord[]> {
+  // `sessions` has no trigger function — read kv directly, matching the
+  // existing memory_sessions dispatch in src/mcp/server.ts.
+  if (step.op === "sessions") {
+    const sessions = await ctx.kv.list<Session>(KV.sessions);
+    const records = mapSessionsList(sessions, step.id, step.project);
+    return records.slice(0, step.maxOut ?? ctx.maxStepOut);
+  }
+
+  const fnId = PRODUCER_FNID[step.op];
+  if (!fnId) throw new QueryRuntimeError(`runProducer called with non-producer op '${(step as { op: string }).op}'`);
+
+  // Build the payload for the existing tool.
+  let payload: Record<string, unknown> = {};
+  switch (step.op) {
+    case "search":
+      payload = {
+        query: step.query,
+        limit: step.limit ?? 10,
+        format: step.format ?? "full",
+        token_budget: step.token_budget,
+      };
+      break;
+    case "smart_search":
+      payload = {
+        query: step.query,
+        limit: step.limit,
+        project: step.project,
+        includeLessons: step.includeLessons,
+      };
+      break;
+    case "lineage":
+      payload = {
+        query: step.query,
+        limit: step.limit,
+        since: step.since,
+        until: step.until,
+        channels: step.channels,
+        includeAdjacentTurns: step.includeAdjacentTurns,
+        includeGraph: step.includeGraph,
+        order: step.order,
+      };
+      break;
+    case "lesson_recall":
+      payload = {
+        query: step.query,
+        project: step.project,
+        minConfidence: step.minConfidence,
+        limit: step.limit,
+      };
+      break;
+    case "graph_query":
+      payload = {
+        startNodeId: step.startNodeId,
+        nodeType: step.nodeType,
+        query: step.query,
+        maxDepth: step.maxDepth,
+      };
+      break;
+    case "facet_query":
+      payload = {
+        matchAll: step.matchAll,
+        matchAny: step.matchAny,
+        targetType: step.targetType,
+        limit: step.limit,
+      };
+      break;
+    case "insight_list":
+      payload = {
+        project: step.project,
+        minConfidence: step.minConfidence,
+        limit: step.limit,
+      };
+      break;
+    case "timeline":
+      payload = {
+        anchor: step.anchor,
+        project: step.project,
+        before: step.before,
+        after: step.after,
+      };
+      break;
+    case "frontier":
+      payload = {
+        project: step.project,
+        agentId: step.agentId,
+        limit: step.limit,
+      };
+      break;
+    case "vision_search":
+      payload = {
+        queryText: step.queryText,
+        queryImageRef: step.queryImageRef,
+        queryImageBase64: step.queryImageBase64,
+        topK: step.topK,
+        sessionId: step.sessionId,
+      };
+      break;
+    case "profile":
+      payload = {
+        project: step.project,
+        refresh: step.refresh,
+      };
+      break;
+    default:
+      throw new QueryRuntimeError(`runProducer payload missing for op '${(step as { op: string }).op}'`);
+  }
+
+  const raw = await ctx.sdk.trigger({ function_id: fnId, payload });
+
+  let records: EnvelopedRecord[];
+  switch (step.op) {
+    case "search":
+      records = mapSearchResults(raw, step.id);
+      break;
+    case "smart_search":
+      records = mapSmartSearchResult(raw, step.id);
+      break;
+    case "lineage":
+      records = mapLineageResult(raw, step.id);
+      break;
+    case "lesson_recall":
+      records = mapLessonRecallResult(raw, step.id);
+      break;
+    case "graph_query":
+      records = mapGraphQueryResult(raw, step.id);
+      break;
+    case "facet_query":
+      records = mapFacetQueryResult(raw, step.id);
+      break;
+    case "insight_list":
+      records = mapInsightListResult(raw, step.id);
+      break;
+    case "timeline":
+      records = mapTimelineResult(raw, step.id);
+      break;
+    case "frontier":
+      records = mapFrontierResult(raw, step.id);
+      break;
+    case "vision_search":
+      records = mapVisionSearchResult(raw, step.id);
+      break;
+    case "profile":
+      records = mapProfileResult(raw, step.id);
+      break;
+    default:
+      records = [];
+  }
+
+  const cap = "maxOut" in step ? (step as { maxOut?: number }).maxOut : undefined;
+  const effectiveCap = cap ?? ctx.maxStepOut;
+  return records.slice(0, effectiveCap);
+}
+
+async function executeStep(
+  step: PipelineStep,
+  streams: Map<string, EnvelopedRecord[]>,
+  ctx: ExecCtx,
+  depth: number,
+):
+  | Promise<
+      | { kind: "ok"; output: EnvelopedRecord[]; outputName: string; llmCallsThisStep: number }
+      | { kind: "terminal"; result: QueryResult }
+      | { kind: "error"; error: string }
+    > {
+  const inputName = typeof step.in === "string" ? step.in : "_";
+  const outputName = step.out ?? "_";
+  const input = streams.get(inputName) ?? [];
+  let output: EnvelopedRecord[] = input;
+  let llmCallsThisStep = 0;
+
+  switch (step.op) {
+    case "search":
+    case "smart_search":
+    case "lineage":
+    case "lesson_recall":
+    case "graph_query":
+    case "facet_query":
+    case "insight_list":
+    case "timeline":
+    case "sessions":
+    case "frontier":
+    case "vision_search":
+    case "profile":
+      output = await runProducer(step, ctx);
+      break;
+    case "filter": {
+      const preds: Predicate[] = Array.isArray(step.where) ? step.where : [step.where];
+      output = input.filter((r) => preds.every((p) => evalPredicate(p, r)));
+      break;
+    }
+    case "sort":
+      output = stableSort(input, step.by, step.dir ?? "desc");
+      break;
+    case "limit":
+    case "take":
+      output = input.slice(0, Math.max(0, step.n | 0));
+      break;
+    case "drop":
+      output = input.slice(Math.max(0, step.n | 0));
+      break;
+    case "project":
+      output = applyProject(input, step.fields, step.rename);
+      break;
+    case "distinct":
+      output = applyDistinct(input, step.by ?? "_id");
+      break;
+    case "flatten":
+      output = applyFlatten(input, step.field);
+      break;
+    case "concat": {
+      const inNames = Array.isArray(step.in) ? step.in : [];
+      if (inNames.length === 0) {
+        return { kind: "error", error: `step '${step.op}': 'in' must be a non-empty array of stream names` };
+      }
+      output = inNames.flatMap((name) => streams.get(name) ?? []);
+      break;
+    }
+    case "group_by":
+      output = applyGroupBy(input, step.by);
+      break;
+    case "top_n_per_group":
+      output = applyTopNPerGroup(input, step.n, step.by, step.dir ?? "desc");
+      break;
+    case "for_each": {
+      if (depth + 1 > (ctx.maxDepth ?? DEFAULTS.maxDepth)) {
+        return { kind: "error", error: `for_each depth exceeded (max ${ctx.maxDepth ?? DEFAULTS.maxDepth})` };
+      }
+      const intoMode = step.into ?? "merge";
+      const collected: EnvelopedRecord[] = [];
+      for (const r of input) {
+        enforceDeadline(ctx, "for_each.iter");
+        const sub = await executePipelineInternal(step.do, ctx, depth + 1, [r]);
+        if (sub.kind === "error") {
+          return { kind: "error", error: sub.error };
+        }
+        if (sub.kind === "records") {
+          if (intoMode === "list") {
+            collected.push({
+              _kind: "group",
+              _id: `for_each:${r._id}`,
+              _source: { op: "for_each", stepId: step.id },
+              _parentId: r._id,
+              _groupSize: sub.result.length,
+              members: sub.result,
+            });
+          } else {
+            collected.push(...sub.result);
+          }
+        }
+      }
+      output = collected;
+      break;
+    }
+    case "join": {
+      const rightStream = streams.get(step.right) ?? [];
+      output = applyJoin(input, rightStream, step.on, step.type ?? "left");
+      break;
+    }
+    case "expand_by_session":
+      output = await applyExpandBySession(input, step.field ?? "_sessionId", ctx);
+      break;
+    case "rank_by_relevance":
+      output = await applyRankByRelevance(input, step.target, step.topK, ctx);
+      llmCallsThisStep = 1;
+      break;
+    case "synthesize": {
+      const style = step.style ?? "answer";
+      const maxCitations = Math.max(1, Math.min(step.maxCitations ?? 6, 20));
+      const userPrompt = buildSynthPrompt(input, step.question, style, maxCitations);
+      const text = await ctx.provider.summarize(SYNTH_SYSTEM_PROMPT, userPrompt);
+      ctx.llmCalls += 1;
+      const synth = parseSynthesis(text, input, maxCitations);
+      return {
+        kind: "terminal",
+        result: {
+          kind: "synthesis",
+          result: synth,
+          trace: [],
+          cost: {
+            totalCostUnits: ctx.budget.spent,
+            totalMs: 0,
+            llmCalls: ctx.llmCalls,
+            budgetCap: ctx.budget.cap,
+          },
+        },
+      };
+    }
+    default: {
+      const opName = (step as { op: string }).op;
+      return { kind: "error", error: `unsupported op '${opName}'` };
+    }
+  }
+
+  return { kind: "ok", output, outputName, llmCallsThisStep };
+}
+
+async function executePipelineInternal(
+  pipeline: PipelineStep[],
+  ctx: ExecCtx,
+  depth: number,
+  initialInput: EnvelopedRecord[] | undefined = undefined,
+): Promise<
+  | { kind: "records"; result: EnvelopedRecord[]; trace: StepTrace[] }
+  | { kind: "synthesis"; result: { summary: string; citations: { kind: EnvelopedKind; id: string }[] }; trace: StepTrace[] }
+  | { kind: "error"; error: string; trace: StepTrace[] }
+> {
+  const trace: StepTrace[] = [];
+  const streams = new Map<string, EnvelopedRecord[]>();
+  streams.set("_", initialInput ?? []);
+
+  for (const step of pipeline) {
+    enforceDeadline(ctx, step.op);
+    const cost = COST_CLASS[step.op];
+    if (ctx.budget.spent + cost > ctx.budget.cap) {
+      return {
+        kind: "error",
+        error: `budget_exceeded: would spend ${ctx.budget.spent + cost}, cap=${ctx.budget.cap}`,
+        trace,
+      };
+    }
+    const t0 = Date.now();
+    const inputName = typeof step.in === "string" ? step.in : "_";
+    const inCount = (streams.get(inputName) ?? []).length;
+    let result;
+    try {
+      result = await executeStep(step, streams, ctx, depth);
+    } catch (err) {
+      const errMsg = err instanceof QueryRuntimeError ? err.message : err instanceof Error ? err.message : String(err);
+      return { kind: "error", error: errMsg, trace };
+    }
+    if (result.kind === "error") {
+      return { kind: "error", error: result.error, trace };
+    }
+    if (result.kind === "terminal") {
+      const synthRes = result.result;
+      ctx.budget.spent += cost;
+      trace.push({
+        op: step.op,
+        stepId: step.id,
+        inCount,
+        outCount: 0,
+        ms: Date.now() - t0,
+        costClass: cost,
+        llmCalls: 1,
+      });
+      if (synthRes.kind === "synthesis") {
+        return { kind: "synthesis", result: synthRes.result, trace };
+      }
+      // Shouldn't happen — terminal only on synthesize today.
+      return { kind: "error", error: "internal: terminal result not synthesis", trace };
+    }
+    const output = result.output.slice(0, ctx.maxStepOut);
+    streams.set(result.outputName, output);
+    ctx.budget.spent += cost;
+    trace.push({
+      op: step.op,
+      stepId: step.id,
+      inCount,
+      outCount: output.length,
+      ms: Date.now() - t0,
+      costClass: cost,
+      llmCalls: result.llmCallsThisStep > 0 ? result.llmCallsThisStep : undefined,
+    });
+  }
+
+  return { kind: "records", result: streams.get("_") ?? [], trace };
+}
+
+async function executePipeline(
+  pipeline: PipelineStep[],
+  ctx: ExecCtx,
+): Promise<QueryResult> {
+  const inner = await executePipelineInternal(pipeline, ctx, 0);
+  if (inner.kind === "error") {
+    return {
+      kind: "error",
+      error: inner.error,
+      trace: inner.trace,
+      cost: {
+        totalCostUnits: ctx.budget.spent,
+        totalMs: Date.now() - ctx.startedAt,
+        llmCalls: ctx.llmCalls,
+        budgetCap: ctx.budget.cap,
+      },
+    };
+  }
+  if (inner.kind === "synthesis") {
+    return {
+      kind: "synthesis",
+      result: inner.result,
+      trace: inner.trace,
+      cost: {
+        totalCostUnits: ctx.budget.spent,
+        totalMs: Date.now() - ctx.startedAt,
+        llmCalls: ctx.llmCalls,
+        budgetCap: ctx.budget.cap,
+      },
+      warnings: ctx.warnings.length > 0 ? [...ctx.warnings] : undefined,
+    };
+  }
+  return {
+    kind: "records",
+    result: inner.result,
+    trace: inner.trace,
+    cost: {
+      totalCostUnits: ctx.budget.spent,
+      totalMs: Date.now() - ctx.startedAt,
+      llmCalls: ctx.llmCalls,
+      budgetCap: ctx.budget.cap,
+    },
+    warnings: ctx.warnings.length > 0 ? [...ctx.warnings] : undefined,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function registerQueryFunction(
+  sdk: ISdk,
+  kv: StateKV,
+  provider: MemoryProvider,
+): void {
+  sdk.registerFunction(
+    "mem::query",
+    async (data: QueryRequest): Promise<QueryResult> => {
+      const validation = validatePipeline(data?.pipeline);
+      if (!validation.ok) {
+        return {
+          kind: "error",
+          error: validation.error,
+          trace: [],
+          cost: { totalCostUnits: 0, totalMs: 0, llmCalls: 0, budgetCap: 0 },
+        };
+      }
+
+      const pipeline = validation.pipeline;
+      const opts = data.options ?? {};
+
+      const budgetCap = Math.min(
+        Math.max(opts.budget ?? DEFAULTS.budget, 1),
+        DEFAULTS.budgetMax,
+      );
+      const timeoutMs = Math.min(
+        Math.max(opts.timeoutMs ?? DEFAULTS.timeoutMs, 1_000),
+        DEFAULTS.timeoutMaxMs,
+      );
+      const maxStepOut = Math.min(
+        Math.max(opts.maxStepOut ?? DEFAULTS.maxStepOut, 1),
+        2_000,
+      );
+      const maxDepth = Math.min(
+        Math.max(opts.maxDepth ?? DEFAULTS.maxDepth, 1),
+        5,
+      );
+
+      if (opts.dry_run === true) {
+        return {
+          kind: "dry_run",
+          plan: pipeline,
+          estimatedCost: estimatePipelineCost(pipeline),
+        };
+      }
+
+      const startedAt = Date.now();
+      const ctx: ExecCtx = {
+        sdk,
+        kv,
+        provider,
+        startedAt,
+        deadlineAt: startedAt + timeoutMs,
+        budget: { spent: 0, cap: budgetCap },
+        llmCalls: 0,
+        maxStepOut,
+        maxDepth,
+        warnings: [],
+      };
+
+      const result = await executePipeline(pipeline, ctx);
+
+      // Best-effort audit; non-fatal.
+      try {
+        const summary: Record<string, unknown> = {
+          ops: pipeline.map((s) => s.op),
+          kind: result.kind,
+          steps: pipeline.length,
+        };
+        if ("cost" in result) {
+          summary["totalCostUnits"] = (result as { cost?: QueryCost }).cost?.totalCostUnits;
+          summary["llmCalls"] = (result as { cost?: QueryCost }).cost?.llmCalls;
+        }
+        void safeAudit(kv, "query", "mem::query", [], summary);
+      } catch (err) {
+        logger.warn("mem::query audit failed", {
+          error: err instanceof Error ? err.message : String(err),
+        });
+      }
+
+      return result;
+    },
+  );
+}
diff --git a/src/index.ts b/src/index.ts
index 09b6aa77..342b63d7 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -50,6 +50,7 @@ import { registerRelationsFunction } from "./functions/relations.js";
 import { registerTimelineFunction } from "./functions/timeline.js";
 import { registerSmartSearchFunction } from "./functions/smart-search.js";
 import { registerLineageFunction } from "./functions/lineage.js";
+import { registerQueryFunction } from "./functions/query.js";
 import { registerProfileFunction } from "./functions/profile.js";
 import { registerAutoForgetFunction } from "./functions/auto-forget.js";
 import { registerExportImportFunction } from "./functions/export-import.js";
@@ -213,6 +214,7 @@ async function main() {
   registerCompressFunction(sdk, kv, provider, metricsStore);
   registerSearchFunction(sdk, kv);
   registerLineageFunction(sdk, kv);
+  registerQueryFunction(sdk, kv, provider);
   registerContextFunction(sdk, kv, config.tokenBudget);
   registerSummarizeFunction(sdk, kv, provider, metricsStore);
   registerMigrateFunction(sdk, kv);
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index 469244c5..fa22aa1e 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -345,6 +345,31 @@ export function registerMcpEndpoints(
             };
           }
 
+          case "memory_query": {
+            if (!Array.isArray(args.pipeline)) {
+              return {
+                status_code: 400,
+                body: { error: "pipeline is required for memory_query and must be an array" },
+              };
+            }
+            const payload: Record<string, unknown> = { pipeline: args.pipeline };
+            if (args.options !== undefined && typeof args.options === "object" && args.options !== null) {
+              payload.options = args.options;
+            }
+            const result = await sdk.trigger({
+              function_id: "mem::query",
+              payload,
+            });
+            return {
+              status_code: 200,
+              body: {
+                content: [
+                  { type: "text", text: JSON.stringify(result, null, 2) },
+                ],
+              },
+            };
+          }
+
           case "memory_vision_search": {
             const queryText = typeof args.queryText === "string" ? args.queryText : undefined;
             const queryImageRef = typeof args.queryImageRef === "string" ? args.queryImageRef : undefined;
diff --git a/src/mcp/tools-registry.ts b/src/mcp/tools-registry.ts
index 5959fd0c..9c3d16da 100644
--- a/src/mcp/tools-registry.ts
+++ b/src/mcp/tools-registry.ts
@@ -1,9 +1,31 @@
+// Property descriptors mirror the JSON Schema fields we actually emit.
+// Beyond {type, description}, tools (notably memory_query) need nested
+// item schemas (items), discriminated unions (oneOf), nested object
+// shapes (properties + required), and constant/enum constraints. Kept
+// loose enough to express those without forcing every existing tool to
+// adopt the richer shape.
+export type McpPropertySchema = {
+  type?: string | string[];
+  description?: string;
+  items?: McpPropertySchema;
+  properties?: Record<string, McpPropertySchema>;
+  required?: string[];
+  oneOf?: McpPropertySchema[];
+  anyOf?: McpPropertySchema[];
+  allOf?: McpPropertySchema[];
+  const?: unknown;
+  enum?: unknown[];
+  default?: unknown;
+  additionalProperties?: boolean | McpPropertySchema;
+  examples?: unknown[];
+};
+
 export type McpToolDef = {
   name: string;
   description: string;
   inputSchema: {
     type: "object";
-    properties: Record<string, { type: string; description: string }>;
+    properties: Record<string, McpPropertySchema>;
     required?: string[];
   };
 };
@@ -949,6 +971,537 @@ export const V010_SLOTS_TOOLS: McpToolDef[] = [
   },
 ];
 
+// ---------------------------------------------------------------------------
+// memory_query (v5-A) — server-side composable retrieval pipeline.
+// ---------------------------------------------------------------------------
+
+// Reused across every step variant. `in`/`out` route between named
+// streams; default stream name is "_". `id` is an optional debug label
+// echoed back in the per-step trace.
+const QUERY_STEP_BASE_PROPS: Record<string, McpPropertySchema> = {
+  id: { type: "string", description: "Optional debug label echoed in trace." },
+  in: {
+    type: "string",
+    description:
+      "Named input stream (default '_'). For multi-stream consumers like `concat`, send `in` as an array of stream names. Producers usually omit this.",
+  },
+  out: {
+    type: "string",
+    description:
+      "Named output stream (default '_'). Set to fork a producer's results into a sidecar stream that downstream steps can `join` against.",
+  },
+};
+
+// Shared sub-schema for filter predicates. Recursive: predicates compose
+// via `all`/`any`/`not`. Each leaf is `{field, op, value}` with `field`
+// supporting dot-paths against the envelope.
+const QUERY_PREDICATE_SCHEMA: McpPropertySchema = {
+  description:
+    "Filter predicate. Leaf form: {field, op, value}. Compose with {all|any: [Predicate, ...]} or {not: Predicate}. `field` accepts dot-paths against the record envelope (e.g. '_kind', '_session.project', 'type'). `op` values: eq, neq, in, not_in, gt, gte, lt, lte, contains, starts_with, exists, since, until. ISO timestamps required for since/until.",
+  // anyOf so schema-aware models can offer either leaf or composite.
+  anyOf: [
+    {
+      type: "object",
+      properties: {
+        field: { type: "string" },
+        op: {
+          enum: [
+            "eq",
+            "neq",
+            "in",
+            "not_in",
+            "gt",
+            "gte",
+            "lt",
+            "lte",
+            "contains",
+            "starts_with",
+            "exists",
+            "since",
+            "until",
+          ],
+        },
+        value: {},
+      },
+      required: ["field", "op"],
+    },
+    { type: "object", properties: { all: { type: "array" } }, required: ["all"] },
+    { type: "object", properties: { any: { type: "array" } }, required: ["any"] },
+    { type: "object", properties: { not: {} }, required: ["not"] },
+  ],
+};
+
+// Each step variant declares its `op` as a const and lists its
+// op-specific fields alongside the shared `id`/`in`/`out` base. Required
+// fields are explicit so schema-aware tool-use models autocomplete the
+// right shape.
+const QUERY_STEP_SCHEMAS: McpPropertySchema[] = [
+  // ---- Producers ----------------------------------------------------------
+  {
+    type: "object",
+    description: "search — BM25/hybrid observation search. Wraps mem::search.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "search" },
+      query: { type: "string" },
+      limit: { type: "number", description: "Max raw hits (default 10)." },
+      format: { enum: ["full", "compact", "narrative"] },
+      token_budget: { type: "number" },
+      maxOut: { type: "number", description: "Post-mapping record cap (default 500)." },
+    },
+    required: ["op", "query"],
+  },
+  {
+    type: "object",
+    description:
+      "smart_search — hybrid BM25+vector+graph with lessons-first ranker. Wraps mem::smart-search.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "smart_search" },
+      query: { type: "string" },
+      limit: { type: "number" },
+      project: { type: "string" },
+      includeLessons: { type: "boolean" },
+      maxOut: { type: "number" },
+    },
+    required: ["op", "query"],
+  },
+  {
+    type: "object",
+    description:
+      "lineage — chronologically-ordered hits across observation/memory/lesson/summary channels. Wraps mem::lineage. Use to answer 'when did this term enter the corpus?'.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "lineage" },
+      query: { type: "string" },
+      limit: { type: "number" },
+      since: { type: "string", description: "ISO timestamp lower bound." },
+      until: { type: "string", description: "ISO timestamp upper bound." },
+      channels: {
+        type: "array",
+        items: { enum: ["observation", "memory", "lesson", "summary"] },
+      },
+      includeAdjacentTurns: { type: "boolean" },
+      includeGraph: { type: "boolean" },
+      order: { enum: ["asc", "desc"] },
+      maxOut: { type: "number" },
+    },
+    required: ["op", "query"],
+  },
+  {
+    type: "object",
+    description: "lesson_recall — full-text lesson search with confidence decay. Wraps mem::lesson-recall.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "lesson_recall" },
+      query: { type: "string" },
+      project: { type: "string" },
+      minConfidence: { type: "number" },
+      limit: { type: "number" },
+      maxOut: { type: "number" },
+    },
+    required: ["op", "query"],
+  },
+  {
+    type: "object",
+    description:
+      "graph_query — BFS the concept graph. Returns graph_node and graph_edge records. Wraps mem::graph-query.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "graph_query" },
+      startNodeId: { type: "string" },
+      nodeType: { type: "string" },
+      query: { type: "string" },
+      maxDepth: { type: "number" },
+      maxOut: { type: "number" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description:
+      "facet_query — multi-dimensional tag query (AND/OR). At least one of matchAll/matchAny required. Wraps mem::facet-query.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "facet_query" },
+      matchAll: { type: "array", items: { type: "string" } },
+      matchAny: { type: "array", items: { type: "string" } },
+      targetType: { type: "string" },
+      limit: { type: "number" },
+      maxOut: { type: "number" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description: "insight_list — synthesized insights, sorted by confidence. Wraps mem::insight-list.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "insight_list" },
+      project: { type: "string" },
+      minConfidence: { type: "number" },
+      limit: { type: "number" },
+      maxOut: { type: "number" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description:
+      "timeline — observations around a temporal/keyword anchor. Wraps mem::timeline.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "timeline" },
+      anchor: { type: "string", description: "ISO timestamp or keyword." },
+      project: { type: "string" },
+      before: { type: "number" },
+      after: { type: "number" },
+      maxOut: { type: "number" },
+    },
+    required: ["op", "anchor"],
+  },
+  {
+    type: "object",
+    description: "sessions — list known sessions. Reads KV.sessions directly (no LLM, no scan cost).",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "sessions" },
+      project: { type: "string", description: "Optional project filter." },
+      maxOut: { type: "number" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description: "frontier — unblocked actions ranked by priority+recency. Wraps mem::frontier.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "frontier" },
+      project: { type: "string" },
+      agentId: { type: "string" },
+      limit: { type: "number" },
+      maxOut: { type: "number" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description: "vision_search — CLIP-embedding image+text search. Wraps mem::vision-search.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "vision_search" },
+      queryText: { type: "string" },
+      queryImageRef: { type: "string" },
+      queryImageBase64: { type: "string" },
+      topK: { type: "number" },
+      sessionId: { type: "string" },
+      maxOut: { type: "number" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description:
+      "profile — single-record project cohort profile (topConcepts/topFiles/etc.). Wraps mem::profile. Returns ONE envelope with _kind='profile'.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "profile" },
+      project: { type: "string" },
+      refresh: { type: "boolean" },
+    },
+    required: ["op", "project"],
+  },
+  // ---- Transformers (pure JS, no I/O) -------------------------------------
+  {
+    type: "object",
+    description: "filter — keep records matching the predicate.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "filter" },
+      where: QUERY_PREDICATE_SCHEMA,
+    },
+    required: ["op", "where"],
+  },
+  {
+    type: "object",
+    description: "sort — stable multi-key sort. ISO timestamps compare as time.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "sort" },
+      by: {
+        description: "Field path (string) or array of paths for tiebreakers. Dot-paths supported.",
+        anyOf: [{ type: "string" }, { type: "array", items: { type: "string" } }],
+      },
+      dir: { enum: ["asc", "desc"] },
+    },
+    required: ["op", "by"],
+  },
+  {
+    type: "object",
+    description: "limit — keep the first N records.",
+    properties: { ...QUERY_STEP_BASE_PROPS, op: { const: "limit" }, n: { type: "number" } },
+    required: ["op", "n"],
+  },
+  {
+    type: "object",
+    description: "take — alias for limit.",
+    properties: { ...QUERY_STEP_BASE_PROPS, op: { const: "take" }, n: { type: "number" } },
+    required: ["op", "n"],
+  },
+  {
+    type: "object",
+    description: "drop — skip the first N records.",
+    properties: { ...QUERY_STEP_BASE_PROPS, op: { const: "drop" }, n: { type: "number" } },
+    required: ["op", "n"],
+  },
+  {
+    type: "object",
+    description:
+      "project — trim/rename fields. Envelope core (_kind, _id, _source, ...) is always preserved.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "project" },
+      fields: {
+        type: "array",
+        items: { type: "string" },
+        description: "Whitelist of field paths to keep beyond envelope core.",
+      },
+      rename: {
+        type: "object",
+        description: "Map of fromPath → toPath. Original field is kept.",
+        additionalProperties: { type: "string" },
+      },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description: "distinct — dedup by a field (default '_id').",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "distinct" },
+      by: { type: "string" },
+    },
+    required: ["op"],
+  },
+  {
+    type: "object",
+    description: "flatten — explode an array-valued field into one row per item.",
+    properties: { ...QUERY_STEP_BASE_PROPS, op: { const: "flatten" }, field: { type: "string" } },
+    required: ["op", "field"],
+  },
+  {
+    type: "object",
+    description:
+      "concat — union two or more named streams. The `in` field MUST be an array of stream names for this op.",
+    properties: {
+      id: QUERY_STEP_BASE_PROPS.id,
+      out: QUERY_STEP_BASE_PROPS.out,
+      op: { const: "concat" },
+      in: { type: "array", items: { type: "string" } },
+    },
+    required: ["op", "in"],
+  },
+  {
+    type: "object",
+    description:
+      "group_by — partition stream by field. Produces _kind='group' records with `members[]`. Pair with top_n_per_group to re-flatten.",
+    properties: { ...QUERY_STEP_BASE_PROPS, op: { const: "group_by" }, by: { type: "string" } },
+    required: ["op", "by"],
+  },
+  {
+    type: "object",
+    description: "top_n_per_group — within each group_by group, sort members and keep top N. Re-flattens.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "top_n_per_group" },
+      n: { type: "number" },
+      by: { type: "string", description: "Field to sort within each group (default '_score')." },
+      dir: { enum: ["asc", "desc"] },
+    },
+    required: ["op", "n"],
+  },
+  // ---- Cross-step ---------------------------------------------------------
+  {
+    type: "object",
+    description:
+      "for_each — run a sub-pipeline per record. `into: merge` flattens results; `into: list` wraps each iteration as a _kind='group' record. synthesize/rank_by_relevance/nested for_each are REJECTED inside.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "for_each" },
+      do: { type: "array", description: "Sub-pipeline steps." },
+      into: { enum: ["merge", "list"] },
+    },
+    required: ["op", "do"],
+  },
+  {
+    type: "object",
+    description:
+      "join — hash-join two streams on a field. Output emits records of the LEFT shape with an attached `_join.right` (matched right record or null).",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "join" },
+      right: { type: "string", description: "Name of the right-side stream." },
+      on: {
+        type: "object",
+        properties: {
+          left: { type: "string", description: "Field path on left." },
+          right: { type: "string", description: "Field path on right." },
+        },
+        required: ["left", "right"],
+      },
+      type: { enum: ["inner", "left"] },
+    },
+    required: ["op", "right", "on"],
+  },
+  {
+    type: "object",
+    description:
+      "expand_by_session — for each unique value of `field` (default '_sessionId'), fetch Session + SessionSummary from KV and attach as `_session` + `_summary` on every record. Cached per unique id within the step.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "expand_by_session" },
+      field: { type: "string" },
+    },
+    required: ["op"],
+  },
+  // ---- Aggregators (LLM) --------------------------------------------------
+  {
+    type: "object",
+    description:
+      "synthesize — terminal LLM aggregator. Returns {summary, citations[]}. MUST be the last step. One LLM call. Result kind switches to 'synthesis'.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "synthesize" },
+      question: { type: "string" },
+      style: { enum: ["answer", "bullets", "timeline"] },
+      maxCitations: { type: "number" },
+    },
+    required: ["op", "question"],
+  },
+  {
+    type: "object",
+    description:
+      "rank_by_relevance — re-score and re-sort records by LLM relevance to `target`. Non-terminal (still emits records). One LLM call.",
+    properties: {
+      ...QUERY_STEP_BASE_PROPS,
+      op: { const: "rank_by_relevance" },
+      target: { type: "string" },
+      topK: { type: "number" },
+    },
+    required: ["op", "target"],
+  },
+];
+
+const QUERY_OPTIONS_SCHEMA: McpPropertySchema = {
+  type: "object",
+  description: "Execution knobs.",
+  properties: {
+    budget: {
+      type: "number",
+      description:
+        "Sum of step cost units (cheap=1, medium=3, expensive=10). Default 30, max 100. Pipeline aborts if exceeded.",
+    },
+    timeoutMs: { type: "number", description: "Default 10000, max 30000. Deadline checked per step." },
+    maxStepOut: { type: "number", description: "Records-per-step cap. Default 500, max 2000." },
+    maxDepth: { type: "number", description: "for_each nesting cap. Default 3, max 5." },
+    dry_run: {
+      type: "boolean",
+      description:
+        "If true, validate the pipeline + return {kind:'dry_run', plan, estimatedCost} without executing any step. Recommended to invoke once with dry_run before paying for a costly pipeline.",
+    },
+  },
+};
+
+// Three literal examples covering simple → multi-stream → terminal-LLM
+// to anchor the LLM's mental model. Echoed in the description so
+// non-schema-aware models see them too.
+const QUERY_EXAMPLES = [
+  {
+    title: "1) Recent decision memories about X (no LLM)",
+    pipeline: [
+      { op: "search", query: "decision about X", limit: 30 },
+      { op: "filter", where: { field: "_kind", op: "eq", value: "memory" } },
+      { op: "sort", by: "_createdAt", dir: "desc" },
+      { op: "limit", n: 5 },
+    ],
+  },
+  {
+    title: "2) Per-project top-2 lineage hits, by score (no LLM)",
+    pipeline: [
+      { op: "lineage", query: "Y", limit: 200 },
+      { op: "filter", where: { field: "_project", op: "exists" } },
+      { op: "group_by", by: "_project" },
+      { op: "top_n_per_group", n: 2, by: "_score", dir: "desc" },
+    ],
+  },
+  {
+    title:
+      "3) Multi-stream join + terminal synthesis (1 LLM call) — recall + lessons over the past 7 days",
+    pipeline: [
+      {
+        op: "lineage",
+        out: "ctx",
+        query: "X",
+        since: "2026-05-12T00:00:00Z",
+        limit: 100,
+      },
+      { op: "lesson_recall", out: "lessons", query: "X", limit: 30 },
+      {
+        op: "join",
+        in: "ctx",
+        right: "lessons",
+        on: { left: "_sessionId", right: "_sessionId" },
+        type: "left",
+      },
+      { op: "rank_by_relevance", target: "explain X", topK: 12 },
+      {
+        op: "synthesize",
+        question: "Explain X in light of recent activity and lessons.",
+        style: "bullets",
+        maxCitations: 10,
+      },
+    ],
+    options: { budget: 50, timeoutMs: 20000 },
+  },
+];
+
+const QUERY_DESCRIPTION = `Run a composable retrieval pipeline in a single MCP call. The pipeline is an array of typed step objects; each step has \`op\` plus op-specific fields. Use this as your FIRST reach for "what do I remember about X" questions — composition is server-side so multi-step recall is one round-trip, not N.
+
+WORKFLOW: invoke once with options.dry_run=true to validate shape + see estimatedCost, then re-invoke without dry_run. Read-only by construction — writers are rejected.
+
+STREAMS: default stream is "_". Most steps thread it implicitly. Use \`out: "name"\` on a producer to fork into a named stream, then \`in: "name"\` (or \`right: "name"\` for join, \`in: ["a","b"]\` for concat) to pull from it.
+
+ENVELOPE: every record normalizes to \`{_kind, _id, _sessionId?, _project?, _createdAt?, _score?, _kindSpecific?, _source, ...rawFields}\`. Legal _kind values: observation, memory, lesson, insight, action, session, summary, timeline_item, graph_node, graph_edge, slot, facet_hit, signal, checkpoint, frontier_entry, vision_hit, profile, group. Predicates and sort use dot-paths (\`_kind\`, \`_session.project\`, \`type\`).
+
+OPS — producers: search, smart_search, lineage, lesson_recall, graph_query, facet_query, insight_list, timeline, sessions, frontier, vision_search, profile. Transformers (pure JS): filter, sort, limit/take/drop, project, distinct, flatten, concat, group_by, top_n_per_group. Cross-step: for_each (synthesize/rank inside REJECTED), join, expand_by_session. Aggregators (LLM): synthesize (must be terminal; switches result.kind → "synthesis"), rank_by_relevance (non-terminal, one LLM call).
+
+LITERAL EXAMPLES:
+${QUERY_EXAMPLES.map(
+  (ex) => `${ex.title}\n${JSON.stringify({ pipeline: ex.pipeline, ...(("options" in ex && ex.options) ? { options: ex.options } : {}) }, null, 2)}`,
+).join("\n\n")}
+
+Options: budget (default 30, max 100), timeoutMs (default 10000, max 30000), maxStepOut (default 500), maxDepth (default 3, max 5), dry_run.`;
+
+export const V020_QUERY_TOOLS: McpToolDef[] = [
+  {
+    name: "memory_query",
+    description: QUERY_DESCRIPTION,
+    inputSchema: {
+      type: "object",
+      properties: {
+        pipeline: {
+          type: "array",
+          description:
+            "Ordered pipeline steps. Each item is a discriminated-union object keyed by `op`. See the per-op schemas (oneOf) for the exact shape of each step.",
+          items: { oneOf: QUERY_STEP_SCHEMAS },
+        },
+        options: QUERY_OPTIONS_SCHEMA,
+      },
+      required: ["pipeline"],
+    },
+  },
+];
 
 const ESSENTIAL_TOOLS = new Set([
   "memory_save",
@@ -959,6 +1512,7 @@ const ESSENTIAL_TOOLS = new Set([
   "memory_diagnose",
   "memory_lesson_save",
   "memory_reflect",
+  "memory_query",
 ]);
 
 export function getAllTools(): McpToolDef[] {
@@ -971,6 +1525,7 @@ export function getAllTools(): McpToolDef[] {
     ...V070_TOOLS,
     ...V073_TOOLS,
     ...V010_SLOTS_TOOLS,
+    ...V020_QUERY_TOOLS,
   ];
 }
 
diff --git a/src/types.ts b/src/types.ts
index e66988ca..c38829a0 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -939,3 +939,273 @@ export interface StateScope {
 }
 
 export type StateScopeKey = keyof StateScope;
+
+// ---------------------------------------------------------------------------
+// v5-A: mem::query — server-side composable retrieval pipeline
+// ---------------------------------------------------------------------------
+
+export type EnvelopedKind =
+  | "observation"
+  | "memory"
+  | "lesson"
+  | "insight"
+  | "action"
+  | "session"
+  | "summary"
+  | "timeline_item"
+  | "graph_node"
+  | "graph_edge"
+  | "slot"
+  | "facet_hit"
+  | "signal"
+  | "checkpoint"
+  | "frontier_entry"
+  | "vision_hit"
+  | "profile"
+  | "group";
+
+export interface EnvelopedRecord {
+  _kind: EnvelopedKind;
+  _id: string;
+  _sessionId?: string;
+  _project?: string;
+  _createdAt?: string;
+  _score?: number;
+  _kindSpecific?: string;
+  _source: { op: string; stepId?: string };
+  [extra: string]: unknown;
+}
+
+export type PredicateOp =
+  | "eq"
+  | "neq"
+  | "in"
+  | "not_in"
+  | "gt"
+  | "gte"
+  | "lt"
+  | "lte"
+  | "contains"
+  | "starts_with"
+  | "exists"
+  | "since"
+  | "until";
+
+export type Predicate =
+  | { field: string; op: PredicateOp; value?: unknown }
+  | { any: Predicate[] }
+  | { all: Predicate[] }
+  | { not: Predicate };
+
+interface StepBase {
+  id?: string;
+  in?: string | string[];
+  out?: string;
+}
+
+export type PipelineStep =
+  // Producers
+  | (StepBase & {
+      op: "search";
+      query: string;
+      limit?: number;
+      format?: "full" | "compact" | "narrative";
+      token_budget?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "smart_search";
+      query: string;
+      limit?: number;
+      project?: string;
+      includeLessons?: boolean;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "lineage";
+      query: string;
+      limit?: number;
+      since?: string;
+      until?: string;
+      channels?: LineageChannel[];
+      includeAdjacentTurns?: boolean;
+      includeGraph?: boolean;
+      order?: "asc" | "desc";
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "lesson_recall";
+      query: string;
+      project?: string;
+      minConfidence?: number;
+      limit?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "graph_query";
+      startNodeId?: string;
+      nodeType?: string;
+      query?: string;
+      maxDepth?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "facet_query";
+      matchAll?: string[];
+      matchAny?: string[];
+      targetType?: string;
+      limit?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "insight_list";
+      project?: string;
+      minConfidence?: number;
+      limit?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "timeline";
+      anchor: string;
+      project?: string;
+      before?: number;
+      after?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "sessions";
+      project?: string;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "frontier";
+      project?: string;
+      agentId?: string;
+      limit?: number;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "vision_search";
+      queryText?: string;
+      queryImageRef?: string;
+      queryImageBase64?: string;
+      topK?: number;
+      sessionId?: string;
+      maxOut?: number;
+    })
+  | (StepBase & {
+      op: "profile";
+      project: string;
+      refresh?: boolean;
+    })
+  // Transformers
+  | (StepBase & { op: "filter"; where: Predicate | Predicate[] })
+  | (StepBase & {
+      op: "sort";
+      by: string | string[];
+      dir?: "asc" | "desc";
+    })
+  | (StepBase & { op: "limit"; n: number })
+  | (StepBase & { op: "take"; n: number })
+  | (StepBase & { op: "drop"; n: number })
+  | (StepBase & {
+      op: "project";
+      fields?: string[];
+      rename?: Record<string, string>;
+    })
+  | (StepBase & { op: "distinct"; by?: string })
+  | (StepBase & { op: "flatten"; field: string })
+  | (StepBase & { op: "concat"; in: string[] })
+  | (StepBase & { op: "group_by"; by: string })
+  | (StepBase & {
+      op: "top_n_per_group";
+      n: number;
+      by?: string;
+      dir?: "asc" | "desc";
+    })
+  // Cross-step
+  | (StepBase & {
+      op: "for_each";
+      do: PipelineStep[];
+      into?: "merge" | "list";
+    })
+  | (StepBase & {
+      op: "join";
+      right: string;
+      on: { left: string; right: string };
+      type?: "inner" | "left";
+    })
+  | (StepBase & { op: "expand_by_session"; field?: string })
+  // Aggregators
+  | (StepBase & {
+      op: "synthesize";
+      question: string;
+      style?: "answer" | "bullets" | "timeline";
+      maxCitations?: number;
+    })
+  | (StepBase & {
+      op: "rank_by_relevance";
+      target: string;
+      topK?: number;
+    });
+
+export type PipelineOpName = PipelineStep["op"];
+
+export interface QueryOptions {
+  budget?: number;
+  timeoutMs?: number;
+  maxStepOut?: number;
+  maxDepth?: number;
+  dry_run?: boolean;
+}
+
+export interface QueryRequest {
+  pipeline: PipelineStep[];
+  options?: QueryOptions;
+}
+
+export interface StepTrace {
+  op: string;
+  stepId?: string;
+  inCount: number;
+  outCount: number;
+  ms: number;
+  costClass: 1 | 3 | 10;
+  llmCalls?: number;
+  warnings?: string[];
+}
+
+export interface QueryCost {
+  totalCostUnits: number;
+  totalMs: number;
+  llmCalls: number;
+  budgetCap: number;
+}
+
+export type QueryResult =
+  | {
+      kind: "records";
+      result: EnvelopedRecord[];
+      trace: StepTrace[];
+      cost: QueryCost;
+      warnings?: string[];
+    }
+  | {
+      kind: "synthesis";
+      result: { summary: string; citations: { kind: EnvelopedKind; id: string }[] };
+      trace: StepTrace[];
+      cost: QueryCost;
+      warnings?: string[];
+    }
+  | {
+      kind: "dry_run";
+      plan: PipelineStep[];
+      estimatedCost: { min: number; max: number };
+      validationErrors?: string[];
+    }
+  | {
+      kind: "error";
+      error: string;
+      trace: StepTrace[];
+      cost: QueryCost;
+    };
diff --git a/test/query-integration.test.ts b/test/query-integration.test.ts
new file mode 100644
index 00000000..a9d4f113
--- /dev/null
+++ b/test/query-integration.test.ts
@@ -0,0 +1,501 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+
+vi.mock("../src/logger.js", () => ({
+  logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+}));
+
+import { registerQueryFunction } from "../src/functions/query.js";
+import type {
+  CompressedObservation,
+  Lesson,
+  LineageResult,
+  MemoryProvider,
+  QueryRequest,
+  QueryResult,
+  Session,
+  SessionSummary,
+  TimelineItem,
+} from "../src/types.js";
+
+// ---- mocks -----------------------------------------------------------------
+
+function mockKV() {
+  const store = new Map<string, Map<string, unknown>>();
+  return {
+    store,
+    get: async <T>(scope: string, key: string): Promise<T | null> => {
+      return (store.get(scope)?.get(key) as T) ?? null;
+    },
+    set: async <T>(scope: string, key: string, data: T): Promise<T> => {
+      if (!store.has(scope)) store.set(scope, new Map());
+      store.get(scope)!.set(key, data);
+      return data;
+    },
+    delete: async (scope: string, key: string): Promise<void> => {
+      store.get(scope)?.delete(key);
+    },
+    list: async <T>(scope: string): Promise<T[]> => {
+      const entries = store.get(scope);
+      return entries ? (Array.from(entries.values()) as T[]) : [];
+    },
+  };
+}
+
+function mockSdk() {
+  const functions = new Map<string, Function>();
+  return {
+    functions,
+    registerFunction: (idOrOpts: string | { id: string }, handler: Function) => {
+      const id = typeof idOrOpts === "string" ? idOrOpts : idOrOpts.id;
+      functions.set(id, handler);
+    },
+    registerTrigger: () => {},
+    trigger: async (idOrInput: string | { function_id: string; payload: unknown }, data?: unknown) => {
+      const id = typeof idOrInput === "string" ? idOrInput : idOrInput.function_id;
+      const payload = typeof idOrInput === "string" ? data : idOrInput.payload;
+      const fn = functions.get(id);
+      if (!fn) throw new Error(`No function registered: ${id}`);
+      return fn(payload);
+    },
+  };
+}
+
+interface MockProvider extends MemoryProvider {
+  calls: { kind: "summarize" | "compress"; system: string; user: string }[];
+}
+
+function mockProvider(): MockProvider {
+  const calls: { kind: "summarize" | "compress"; system: string; user: string }[] = [];
+  return {
+    name: "mock",
+    calls,
+    summarize: async (system: string, user: string) => {
+      calls.push({ kind: "summarize", system, user });
+      // Heuristic: if the system prompt is the ranker, return a JSON
+      // array assigning a fake score per id mentioned in user prompt.
+      if (system.includes("relevance scorer")) {
+        const ids = [...user.matchAll(/id=(\S+)/g)].map((m) => m[1]);
+        const arr = ids.map((id, i) => ({ id, score: 1 - i * 0.1 }));
+        return JSON.stringify(arr);
+      }
+      // Otherwise it's the synthesize prompt: echo the ids it finds as
+      // citations so the parser can pick them up.
+      const ids = [...user.matchAll(/_id=(\S+)/g)].map((m) => m[1]);
+      const summary = `STUB SUMMARY mentioning ${ids.slice(0, 3).join(", ")}`;
+      const citations = JSON.stringify(ids.slice(0, 3).map((id) => ({ kind: "memory", id })));
+      return `${summary}\nCITATIONS: ${citations}`;
+    },
+    compress: async () => "STUB COMPRESS",
+  };
+}
+
+// ---- helpers ---------------------------------------------------------------
+
+function timelineItem(overrides: Partial<TimelineItem>): TimelineItem {
+  return {
+    timestamp: "2026-05-15T00:00:00Z",
+    channel: "memory",
+    id: "tl-default",
+    sessionId: "s-default",
+    project: "proj-A",
+    title: "Default title",
+    snippet: "Default snippet",
+    score: 1.0,
+    ...overrides,
+  } as TimelineItem;
+}
+
+function makeLineage(items: TimelineItem[]): LineageResult {
+  return {
+    query: "stub",
+    firstMention: items[0]
+      ? {
+          timestamp: items[0].timestamp,
+          channel: items[0].channel,
+          sessionId: items[0].sessionId,
+          project: items[0].project,
+        }
+      : null,
+    timeline: items,
+    totalsByChannel: {
+      observation: items.filter((i) => i.channel === "observation").length,
+      memory: items.filter((i) => i.channel === "memory").length,
+      lesson: items.filter((i) => i.channel === "lesson").length,
+      summary: items.filter((i) => i.channel === "summary").length,
+    },
+  };
+}
+
+async function callQuery(
+  sdk: ReturnType<typeof mockSdk>,
+  req: QueryRequest,
+): Promise<QueryResult> {
+  return (await sdk.trigger({ function_id: "mem::query", payload: req })) as QueryResult;
+}
+
+// ---- tests -----------------------------------------------------------------
+
+describe("mem::query — integration", () => {
+  let sdk: ReturnType<typeof mockSdk>;
+  let kv: ReturnType<typeof mockKV>;
+  let provider: MockProvider;
+
+  beforeEach(() => {
+    sdk = mockSdk();
+    kv = mockKV();
+    provider = mockProvider();
+    registerQueryFunction(sdk as never, kv as never, provider);
+  });
+
+  it("rejects writer ops at validation time", async () => {
+    const result = await callQuery(sdk, {
+      pipeline: [{ op: "save" as never, content: "x" } as never],
+    });
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.error).toMatch(/not allowed/);
+    }
+  });
+
+  it("rejects empty pipeline", async () => {
+    const result = await callQuery(sdk, { pipeline: [] as never });
+    expect(result.kind).toBe("error");
+  });
+
+  it("rejects synthesize that isn't terminal", async () => {
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "synthesize", question: "huh" },
+        { op: "limit", n: 1 },
+      ] as never,
+    });
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") expect(result.error).toMatch(/terminal/);
+  });
+
+  it("rejects synthesize inside for_each (LLM blowup)", async () => {
+    const result = await callQuery(sdk, {
+      pipeline: [
+        {
+          op: "for_each",
+          do: [{ op: "synthesize", question: "no" }],
+        },
+      ] as never,
+    });
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") expect(result.error).toMatch(/synthesize.*not allowed inside for_each/);
+  });
+
+  it("rejects rank_by_relevance inside for_each", async () => {
+    const result = await callQuery(sdk, {
+      pipeline: [
+        {
+          op: "for_each",
+          do: [{ op: "rank_by_relevance", target: "x" }],
+        },
+      ] as never,
+    });
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") expect(result.error).toMatch(/rank_by_relevance.*not allowed/);
+  });
+
+  it("dry_run returns plan and estimated cost without executing", async () => {
+    sdk.registerFunction("mem::lineage", async () => {
+      throw new Error("should not be called");
+    });
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "filter", where: { field: "_kind", op: "eq", value: "memory" } },
+        { op: "limit", n: 5 },
+      ],
+      options: { dry_run: true },
+    });
+    expect(result.kind).toBe("dry_run");
+    if (result.kind === "dry_run") {
+      expect(result.plan.length).toBe(3);
+      expect(result.estimatedCost.min).toBe(3 + 1 + 1); // lineage:3 + filter:1 + limit:1
+      expect(result.estimatedCost.max).toBe(result.estimatedCost.min);
+    }
+  });
+
+  it("runs producer + transformers and returns records", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "m1", channel: "memory", title: "Decision A", score: 0.9 }),
+        timelineItem({ id: "o1", channel: "observation", title: "Obs A", score: 0.5 }),
+        timelineItem({ id: "m2", channel: "memory", title: "Decision B", score: 0.7 }),
+      ]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "filter", where: { field: "_kind", op: "eq", value: "memory" } },
+        { op: "sort", by: "_score", dir: "desc" },
+        { op: "limit", n: 2 },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.result.length).toBe(2);
+      expect(result.result[0]._id).toBe("m1");
+      expect(result.result[1]._id).toBe("m2");
+      expect(result.cost.llmCalls).toBe(0);
+      expect(result.trace.map((t) => t.op)).toEqual(["lineage", "filter", "sort", "limit"]);
+    }
+  });
+
+  it("synthesize terminates pipeline and invokes the provider once", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "m1", channel: "memory", title: "Pick X", snippet: "We picked X." }),
+        timelineItem({ id: "m2", channel: "memory", title: "Rejected Y", snippet: "Considered Y but…" }),
+      ]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "pick X" },
+        { op: "synthesize", question: "Why X?", style: "answer", maxCitations: 3 },
+      ],
+    });
+    expect(result.kind).toBe("synthesis");
+    if (result.kind === "synthesis") {
+      expect(result.cost.llmCalls).toBe(1);
+      expect(provider.calls.length).toBe(1);
+      expect(provider.calls[0].kind).toBe("summarize");
+      expect(result.result.summary).toContain("STUB SUMMARY");
+      expect(result.result.citations.length).toBeGreaterThan(0);
+      expect(result.result.citations.some((c) => c.id === "m1")).toBe(true);
+    }
+  });
+
+  it("budget_exceeded short-circuits before terminal LLM step", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([timelineItem({ id: "m1" })]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "synthesize", question: "?" },
+      ],
+      options: { budget: 5 },
+    });
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.error).toMatch(/budget_exceeded/);
+      // lineage ran (cost 3), but synthesize (cost 10) would push spent to 13 > cap 5
+      expect(result.cost.totalCostUnits).toBe(3);
+      expect(result.cost.llmCalls).toBe(0);
+    }
+  });
+
+  it("named streams + join + distinct + limit", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "tl1", sessionId: "s1", channel: "observation" }),
+        timelineItem({ id: "tl2", sessionId: "s2", channel: "observation" }),
+        timelineItem({ id: "tl3", sessionId: "s1", channel: "observation" }),
+      ]),
+    );
+    sdk.registerFunction("mem::lesson-recall", async () => ({
+      success: true,
+      lessons: [
+        { id: "lsn-s1", content: "lesson about s1", project: "p", createdAt: "2026-01-01T00:00:00Z", confidence: 0.9, tags: [] } as Lesson,
+      ],
+    }));
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", out: "ctx", query: "x" },
+        { op: "lesson_recall", out: "lessons", query: "x" },
+        { op: "join", in: "ctx", right: "lessons", on: { left: "_sessionId", right: "_id" }, type: "left" },
+      ] as never,
+    });
+    // Note: lessons are emitted with _id="lsn-s1" not session id, so this
+    // particular `on` doesn't match — that's intentional, tests the
+    // left-join null path.
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.result.length).toBe(3); // 3 left, none matched (left-join keeps all)
+      expect(result.result.every((r) => "_join" in r)).toBe(true);
+    }
+  });
+
+  it("group_by + top_n_per_group: per-project limit", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "p1a", project: "p1", score: 1, timestamp: "2026-01-01T00:00:00Z" }),
+        timelineItem({ id: "p1b", project: "p1", score: 5, timestamp: "2026-01-02T00:00:00Z" }),
+        timelineItem({ id: "p1c", project: "p1", score: 3, timestamp: "2026-01-03T00:00:00Z" }),
+        timelineItem({ id: "p2a", project: "p2", score: 9, timestamp: "2026-01-04T00:00:00Z" }),
+        timelineItem({ id: "p2b", project: "p2", score: 2, timestamp: "2026-01-05T00:00:00Z" }),
+      ]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "group_by", by: "_project" },
+        { op: "top_n_per_group", n: 2, by: "_score", dir: "desc" },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.result.length).toBe(4); // 2 per project, 2 projects
+      const p1 = result.result.filter((r) => r._project === "p1");
+      const p2 = result.result.filter((r) => r._project === "p2");
+      expect(p1.length).toBe(2);
+      expect(p2.length).toBe(2);
+      expect(p1[0]._id).toBe("p1b"); // top by score
+    }
+  });
+
+  it("for_each runs sub-pipeline per record and merges by default", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "x1", channel: "observation" }),
+        timelineItem({ id: "x2", channel: "memory" }),
+        timelineItem({ id: "x3", channel: "observation" }),
+      ]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        {
+          op: "for_each",
+          do: [{ op: "filter", where: { field: "_kind", op: "eq", value: "observation" } }],
+          into: "merge",
+        },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.result.length).toBe(2); // x1, x3 survive
+      expect(result.result.every((r) => r._kind === "observation")).toBe(true);
+    }
+  });
+
+  it("rank_by_relevance applies a single LLM call and re-sorts", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "a", title: "A" }),
+        timelineItem({ id: "b", title: "B" }),
+        timelineItem({ id: "c", title: "C" }),
+      ]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "rank_by_relevance", target: "best one", topK: 2 },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.cost.llmCalls).toBe(1);
+      expect(provider.calls.length).toBe(1);
+      expect(provider.calls[0].system).toMatch(/relevance scorer/);
+      expect(result.result.length).toBe(2); // topK applied
+      // Mock assigns scores 1.0, 0.9, 0.8 in record order → first two survive
+      expect(result.result[0]._id).toBe("a");
+      expect(result.result[1]._id).toBe("b");
+    }
+  });
+
+  it("expand_by_session loads session + summary from KV", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([timelineItem({ id: "obs-1", sessionId: "ses-1" })]),
+    );
+    const session: Session = {
+      id: "ses-1",
+      project: "p",
+      startedAt: "2026-01-01T00:00:00Z",
+      firstPrompt: "hello there",
+    } as Session;
+    const summary: SessionSummary = {
+      sessionId: "ses-1",
+      title: "What we did",
+      narrative: "Did things",
+      createdAt: "2026-01-02T00:00:00Z",
+      project: "p",
+    } as SessionSummary;
+    await kv.set("mem:sessions", "ses-1", session);
+    await kv.set("mem:summaries", "ses-1", summary);
+
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "expand_by_session" },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.result.length).toBe(1);
+      const r = result.result[0];
+      expect((r["_session"] as { firstPrompt?: string })?.firstPrompt).toBe("hello there");
+      expect((r["_summary"] as { title?: string })?.title).toBe("What we did");
+    }
+  });
+
+  it("`out` defaults to `_` (linear flow works even after named-stream producer)", async () => {
+    // Producer writes to a named stream `data`; downstream filter (without
+    // `in`) reads from `_`. With out-default-to-`_`, the producer writes
+    // BOTH to `_` AND to `data`? No — producer writes ONLY to its explicit
+    // `out`. So downstream reads `_` which is empty.
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([timelineItem({ id: "n1" })]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", out: "data", query: "x" },
+        { op: "filter", where: { field: "_id", op: "exists" } },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      // _ is empty since lineage went to "data" only
+      expect(result.result.length).toBe(0);
+    }
+  });
+
+  it("`sessions` producer reads kv.sessions directly", async () => {
+    const s1: Session = { id: "s1", project: "p1", startedAt: "2026-01-01T00:00:00Z" } as Session;
+    const s2: Session = { id: "s2", project: "p2", startedAt: "2026-01-02T00:00:00Z" } as Session;
+    await kv.set("mem:sessions", "s1", s1);
+    await kv.set("mem:sessions", "s2", s2);
+    const result = await callQuery(sdk, {
+      pipeline: [{ op: "sessions", project: "p1" }] as never,
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.result.length).toBe(1);
+      expect(result.result[0]._id).toBe("s1");
+    }
+  });
+
+  it("trace records inCount, outCount, ms, costClass per step", async () => {
+    sdk.registerFunction("mem::lineage", async () =>
+      makeLineage([
+        timelineItem({ id: "a" }),
+        timelineItem({ id: "b" }),
+        timelineItem({ id: "c" }),
+      ]),
+    );
+    const result = await callQuery(sdk, {
+      pipeline: [
+        { op: "lineage", query: "x" },
+        { op: "limit", n: 2 },
+      ],
+    });
+    expect(result.kind).toBe("records");
+    if (result.kind === "records") {
+      expect(result.trace.length).toBe(2);
+      expect(result.trace[0]).toMatchObject({ op: "lineage", inCount: 0, outCount: 3, costClass: 3 });
+      expect(result.trace[1]).toMatchObject({ op: "limit", inCount: 3, outCount: 2, costClass: 1 });
+      expect(result.trace[0].ms).toBeGreaterThanOrEqual(0);
+    }
+  });
+});
+
+// Side check — silence "kv unused" lints for tests that don't seed kv state.
+void timelineItem;
+void mockKV;
+// (test imports kept exhaustive for clarity; the linter will not flag these.)
+void ({} as CompressedObservation);
diff --git a/test/query-transformers.test.ts b/test/query-transformers.test.ts
new file mode 100644
index 00000000..70b0da7a
--- /dev/null
+++ b/test/query-transformers.test.ts
@@ -0,0 +1,267 @@
+import { describe, it, expect, vi } from "vitest";
+
+vi.mock("../src/logger.js", () => ({
+  logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+}));
+
+import {
+  applyDistinct,
+  applyFlatten,
+  applyGroupBy,
+  applyJoin,
+  applyProject,
+  applyTopNPerGroup,
+  evalPredicate,
+  resolveDotPath,
+  stableSort,
+} from "../src/functions/query.js";
+import type { EnvelopedRecord } from "../src/types.js";
+
+function r(overrides: Partial<EnvelopedRecord>): EnvelopedRecord {
+  return {
+    _kind: "observation",
+    _id: "default",
+    _source: { op: "test" },
+    ...overrides,
+  };
+}
+
+describe("resolveDotPath", () => {
+  it("resolves single key", () => {
+    expect(resolveDotPath({ a: 1 }, "a")).toBe(1);
+  });
+  it("resolves nested path", () => {
+    expect(resolveDotPath({ a: { b: { c: 7 } } }, "a.b.c")).toBe(7);
+  });
+  it("returns undefined for missing intermediate", () => {
+    expect(resolveDotPath({ a: { b: 1 } }, "a.c.d")).toBeUndefined();
+  });
+  it("returns undefined for null intermediate", () => {
+    expect(resolveDotPath({ a: null as unknown as Record<string, unknown> }, "a.b")).toBeUndefined();
+  });
+});
+
+describe("evalPredicate", () => {
+  const rec = r({
+    _kind: "memory",
+    _id: "m1",
+    _score: 0.7,
+    _createdAt: "2026-05-15T00:00:00Z",
+    type: "decision",
+    title: "Picked X over Y",
+  });
+
+  it("eq", () => {
+    expect(evalPredicate({ field: "_kind", op: "eq", value: "memory" }, rec)).toBe(true);
+    expect(evalPredicate({ field: "_kind", op: "eq", value: "lesson" }, rec)).toBe(false);
+  });
+  it("neq", () => {
+    expect(evalPredicate({ field: "_kind", op: "neq", value: "lesson" }, rec)).toBe(true);
+  });
+  it("in / not_in", () => {
+    expect(evalPredicate({ field: "type", op: "in", value: ["decision", "pattern"] }, rec)).toBe(true);
+    expect(evalPredicate({ field: "type", op: "not_in", value: ["decision"] }, rec)).toBe(false);
+  });
+  it("gt / gte / lt / lte", () => {
+    expect(evalPredicate({ field: "_score", op: "gt", value: 0.5 }, rec)).toBe(true);
+    expect(evalPredicate({ field: "_score", op: "gte", value: 0.7 }, rec)).toBe(true);
+    expect(evalPredicate({ field: "_score", op: "lt", value: 0.7 }, rec)).toBe(false);
+    expect(evalPredicate({ field: "_score", op: "lte", value: 0.7 }, rec)).toBe(true);
+  });
+  it("contains / starts_with (case insensitive)", () => {
+    expect(evalPredicate({ field: "title", op: "contains", value: "PICKED" }, rec)).toBe(true);
+    expect(evalPredicate({ field: "title", op: "starts_with", value: "picked" }, rec)).toBe(true);
+    expect(evalPredicate({ field: "title", op: "starts_with", value: "over" }, rec)).toBe(false);
+  });
+  it("exists", () => {
+    expect(evalPredicate({ field: "_score", op: "exists" }, rec)).toBe(true);
+    expect(evalPredicate({ field: "_project", op: "exists" }, rec)).toBe(false);
+  });
+  it("since / until", () => {
+    expect(evalPredicate({ field: "_createdAt", op: "since", value: "2026-05-01T00:00:00Z" }, rec)).toBe(true);
+    expect(evalPredicate({ field: "_createdAt", op: "since", value: "2026-06-01T00:00:00Z" }, rec)).toBe(false);
+    expect(evalPredicate({ field: "_createdAt", op: "until", value: "2026-06-01T00:00:00Z" }, rec)).toBe(true);
+  });
+  it("composes via all/any/not", () => {
+    expect(
+      evalPredicate(
+        {
+          all: [
+            { field: "_kind", op: "eq", value: "memory" },
+            { field: "_score", op: "gt", value: 0.5 },
+          ],
+        },
+        rec,
+      ),
+    ).toBe(true);
+    expect(
+      evalPredicate(
+        {
+          any: [
+            { field: "_kind", op: "eq", value: "lesson" },
+            { field: "type", op: "eq", value: "decision" },
+          ],
+        },
+        rec,
+      ),
+    ).toBe(true);
+    expect(evalPredicate({ not: { field: "_kind", op: "eq", value: "lesson" } }, rec)).toBe(true);
+  });
+});
+
+describe("stableSort", () => {
+  const data: EnvelopedRecord[] = [
+    r({ _id: "a", _score: 1, _createdAt: "2026-01-01T00:00:00Z" }),
+    r({ _id: "b", _score: 3, _createdAt: "2026-01-02T00:00:00Z" }),
+    r({ _id: "c", _score: 2, _createdAt: "2026-01-03T00:00:00Z" }),
+    r({ _id: "d", _score: 3, _createdAt: "2026-01-04T00:00:00Z" }), // ties b
+  ];
+  it("sorts descending by single key", () => {
+    const sorted = stableSort(data, "_score", "desc");
+    expect(sorted.map((x) => x._id)).toEqual(["b", "d", "c", "a"]); // b before d (stable)
+  });
+  it("sorts ascending", () => {
+    const sorted = stableSort(data, "_score", "asc");
+    expect(sorted.map((x) => x._id)).toEqual(["a", "c", "b", "d"]);
+  });
+  it("multi-key tiebreak", () => {
+    const sorted = stableSort(data, ["_score", "_createdAt"], "desc");
+    expect(sorted.map((x) => x._id)).toEqual(["d", "b", "c", "a"]); // d.createdAt > b.createdAt
+  });
+  it("compares ISO timestamps as time", () => {
+    const sorted = stableSort(data, "_createdAt", "desc");
+    expect(sorted.map((x) => x._id)).toEqual(["d", "c", "b", "a"]);
+  });
+});
+
+describe("applyProject", () => {
+  const rec = r({
+    _kind: "memory",
+    _id: "m1",
+    _sessionId: "ses1",
+    _project: "proj",
+    _createdAt: "t",
+    title: "hello",
+    content: "body",
+    extra: "drop me",
+  });
+  it("whitelists fields and always keeps envelope core", () => {
+    const out = applyProject([rec], ["title"], undefined);
+    expect(out[0]._kind).toBe("memory");
+    expect(out[0]._id).toBe("m1");
+    expect(out[0]._sessionId).toBe("ses1");
+    expect(out[0]["title"]).toBe("hello");
+    expect(out[0]["content"]).toBeUndefined();
+    expect(out[0]["extra"]).toBeUndefined();
+  });
+  it("renames fields", () => {
+    const out = applyProject([rec], undefined, { title: "headline" });
+    expect(out[0]["headline"]).toBe("hello");
+    expect(out[0]["title"]).toBe("hello"); // original kept
+  });
+  it("returns shallow copy (no mutation)", () => {
+    const out = applyProject([rec], ["title"], undefined);
+    expect(out[0]).not.toBe(rec);
+  });
+});
+
+describe("applyDistinct", () => {
+  it("dedups by _id (default)", () => {
+    const out = applyDistinct(
+      [
+        r({ _id: "a" }),
+        r({ _id: "b" }),
+        r({ _id: "a", title: "second" }),
+      ],
+      "_id",
+    );
+    expect(out.length).toBe(2);
+    expect(out.map((x) => x._id)).toEqual(["a", "b"]);
+  });
+  it("dedups by arbitrary field", () => {
+    const out = applyDistinct(
+      [
+        r({ _id: "a", _project: "p1" }),
+        r({ _id: "b", _project: "p1" }),
+        r({ _id: "c", _project: "p2" }),
+      ],
+      "_project",
+    );
+    expect(out.length).toBe(2);
+  });
+});
+
+describe("applyFlatten", () => {
+  it("explodes array field into one row per item", () => {
+    const out = applyFlatten([r({ _id: "x", tags: ["a", "b", "c"] })], "tags");
+    expect(out.length).toBe(3);
+    expect(out.map((x) => x["tags"])).toEqual(["a", "b", "c"]);
+  });
+  it("passes through non-array values unchanged", () => {
+    const out = applyFlatten([r({ _id: "x", tags: "just-one" })], "tags");
+    expect(out.length).toBe(1);
+    expect(out[0]["tags"]).toBe("just-one");
+  });
+});
+
+describe("applyGroupBy + applyTopNPerGroup", () => {
+  const recs: EnvelopedRecord[] = [
+    r({ _id: "a1", _project: "p1", _score: 5, _createdAt: "2026-01-01T00:00:00Z" }),
+    r({ _id: "a2", _project: "p1", _score: 3, _createdAt: "2026-01-02T00:00:00Z" }),
+    r({ _id: "a3", _project: "p1", _score: 7, _createdAt: "2026-01-03T00:00:00Z" }),
+    r({ _id: "b1", _project: "p2", _score: 4, _createdAt: "2026-01-04T00:00:00Z" }),
+    r({ _id: "b2", _project: "p2", _score: 6, _createdAt: "2026-01-05T00:00:00Z" }),
+  ];
+  it("groups produce group-typed records with members", () => {
+    const grouped = applyGroupBy(recs, "_project");
+    expect(grouped.length).toBe(2);
+    expect(grouped.every((g) => g._kind === "group")).toBe(true);
+    const p1 = grouped.find((g) => g["_groupKey"] === "p1")!;
+    expect((p1["members"] as EnvelopedRecord[]).length).toBe(3);
+  });
+  it("top_n_per_group sorts by _score desc within group by default", () => {
+    const grouped = applyGroupBy(recs, "_project");
+    const top2 = applyTopNPerGroup(grouped, 2, "_score", "desc");
+    // p1 top-2 by _score desc: a3 (7), a1 (5). p2: b2 (6), b1 (4).
+    const ids = top2.map((x) => x._id);
+    expect(ids).toContain("a3");
+    expect(ids).toContain("a1");
+    expect(ids).toContain("b2");
+    expect(ids).toContain("b1");
+    expect(ids).not.toContain("a2"); // beat out of top-2
+    expect(top2.length).toBe(4);
+  });
+  it("top_n_per_group passes through non-group records unchanged", () => {
+    const mixed: EnvelopedRecord[] = [r({ _id: "loose" })];
+    const out = applyTopNPerGroup(mixed, 1, "_score", "desc");
+    expect(out.length).toBe(1);
+    expect(out[0]._id).toBe("loose");
+  });
+});
+
+describe("applyJoin", () => {
+  const left: EnvelopedRecord[] = [
+    r({ _id: "l1", _sessionId: "s1" }),
+    r({ _id: "l2", _sessionId: "s2" }),
+    r({ _id: "l3", _sessionId: "s3" }),
+  ];
+  const right: EnvelopedRecord[] = [
+    r({ _kind: "lesson", _id: "r-of-s1", _sessionId: "s1", content: "L1" }),
+    r({ _kind: "lesson", _id: "r-of-s2a", _sessionId: "s2", content: "L2a" }),
+    r({ _kind: "lesson", _id: "r-of-s2b", _sessionId: "s2", content: "L2b" }),
+  ];
+  it("left join attaches matches and emits null for misses", () => {
+    const out = applyJoin(left, right, { left: "_sessionId", right: "_sessionId" }, "left");
+    // l1 (1 match), l2 (2 matches), l3 (no match → null)
+    expect(out.length).toBe(4);
+    const noMatch = out.find((o) => o._id === "l3")!;
+    expect((noMatch["_join"] as { right: unknown }).right).toBeNull();
+    const l2Matches = out.filter((o) => o._id === "l2");
+    expect(l2Matches.length).toBe(2);
+  });
+  it("inner join drops unmatched left records", () => {
+    const out = applyJoin(left, right, { left: "_sessionId", right: "_sessionId" }, "inner");
+    expect(out.length).toBe(3); // l1×1 + l2×2 + l3 dropped
+    expect(out.find((o) => o._id === "l3")).toBeUndefined();
+  });
+});

From 19590559bf23c3b4df0fb141597623adb7739517 Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 16:57:27 +0200
Subject: [PATCH 5/8] =?UTF-8?q?docs(AGENTS.md):=20bump=20MCP=20tools=20cou?=
 =?UTF-8?q?nt=2054=20=E2=86=92=2055=20after=20v5-a=20(memory=5Fquery)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

test/consistency.test.ts only checks AGENTS.md for REST endpoint count
(125, which I set in the v4-a commit when /agentmemory/lineage was
added); it does NOT check AGENTS.md for MCP tool count. So when v5-a
added memory_query (total tools 54 → 55), the AGENTS.md "Current
Stats" block silently drifted out of sync.

Bump to match the actual count across all V0xx_TOOLS arrays in
src/mcp/tools-registry.ts (CORE_TOOLS 15 + V040 8 + V050 10 + V051 9
+ V061 1 + V070 3 + V073 2 + V010_SLOTS 6 + V020_QUERY 1 = 55).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 AGENTS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AGENTS.md b/AGENTS.md
index 6032e3a7..1e2b8af6 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -111,7 +111,7 @@ Hook scripts in `src/hooks/` are standalone Node.js scripts (no iii-sdk import).
 
 ## Current Stats (v0.9.16)
 
-- 54 MCP tools (8 visible by default, `AGENTMEMORY_TOOLS=all` for all)
+- 55 MCP tools (8 visible by default, `AGENTMEMORY_TOOLS=all` for all)
 - 125 REST endpoints
 - 6 MCP resources, 3 MCP prompts
 - 12 hooks, 4 skills

From 1f06dd3e5e64cd815e50f4697f1580c009b860ec Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 18:14:06 +0200
Subject: [PATCH 6/8] =?UTF-8?q?docs(v5-a):=20address=20CodeRabbit=20nits?=
 =?UTF-8?q?=20=E2=80=94=20fence=20tags=20+=20README=20count=20consistency?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Tag previously-unlabelled fenced code blocks in docs/plans/v4-lineage-*
  with `text` to satisfy markdownlint MD040 (CodeRabbit comments on
  v4-lineage-design.md:126 and v4-lineage-test-case-careful-generator.md:19).
- Normalize stale "51 tools" / "51-tool" / "(51 tools)" references in
  README.md to "55 tools" (CodeRabbit #574 README.md:802). The headline
  count was already 55 in three spots after v5-a; the helper-paragraphs,
  the "### 51 Tools" heading, and the AGENTMEMORY_TOOLS comment lagged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                                            | 8 ++++----
 docs/plans/v4-lineage-design.md                      | 2 +-
 docs/plans/v4-lineage-test-case-careful-generator.md | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index ae4f1f9a..92dc045f 100644
--- a/README.md
+++ b/README.md
@@ -424,7 +424,7 @@ codex plugin install agentmemory
 
 The Codex plugin ships from the same `plugin/` directory as the Claude Code plugin. It registers:
 
-- `@agentmemory/mcp` as an MCP server (proxies all 51 tools when `AGENTMEMORY_URL` points at a running agentmemory server; falls back to 7 tools locally when no server is reachable)
+- `@agentmemory/mcp` as an MCP server (proxies all 55 tools when `AGENTMEMORY_URL` points at a running agentmemory server; falls back to 7 tools locally when no server is reachable)
 - 6 lifecycle hooks: `SessionStart`, `UserPromptSubmit`, `PreToolUse`, `PostToolUse`, `PreCompact`, `Stop`
 - 4 skills: `/recall`, `/remember`, `/session-history`, `/forget`
 
@@ -801,9 +801,9 @@ npm install @xenova/transformers
 
 55 tools, 6 resources, 3 prompts, and 4 skills — the most comprehensive MCP memory toolkit for any agent.
 
-> **MCP shim vs full server:** the published `@agentmemory/mcp` package is a thin shim. It exposes the full 51-tool surface **only when it can reach a running agentmemory server** via `AGENTMEMORY_URL` (proxy mode). With no server reachable, the shim falls back to a 7-tool local set (`memory_save`, `memory_recall`, `memory_smart_search`, `memory_sessions`, `memory_export`, `memory_audit`, `memory_governance_delete`). The `AGENTMEMORY_TOOLS=core|all` env var is a *server-side* flag — setting it in the shim's `env` block has no effect. If you see only 7 tools in Cursor / OpenCode / Gemini CLI, start `npx @agentmemory/agentmemory` (or the Docker stack) and set `AGENTMEMORY_URL=http://localhost:3111`.
+> **MCP shim vs full server:** the published `@agentmemory/mcp` package is a thin shim. It exposes the full 55-tool surface **only when it can reach a running agentmemory server** via `AGENTMEMORY_URL` (proxy mode). With no server reachable, the shim falls back to a 7-tool local set (`memory_save`, `memory_recall`, `memory_smart_search`, `memory_sessions`, `memory_export`, `memory_audit`, `memory_governance_delete`). The `AGENTMEMORY_TOOLS=core|all` env var is a *server-side* flag — setting it in the shim's `env` block has no effect. If you see only 7 tools in Cursor / OpenCode / Gemini CLI, start `npx @agentmemory/agentmemory` (or the Docker stack) and set `AGENTMEMORY_URL=http://localhost:3111`.
 
-### 51 Tools
+### 55 Tools
 
 <details>
 <summary>Core tools (always available)</summary>
@@ -1189,7 +1189,7 @@ Create `~/.agentmemory/.env`:
 # USER_ID=
 # TEAM_MODE=private
 
-# Tool visibility: "core" (8 tools) or "all" (51 tools)
+# Tool visibility: "core" (8 tools) or "all" (55 tools)
 # AGENTMEMORY_TOOLS=core
 ```
 
diff --git a/docs/plans/v4-lineage-design.md b/docs/plans/v4-lineage-design.md
index f5b3ce6d..2c64b2bc 100644
--- a/docs/plans/v4-lineage-design.md
+++ b/docs/plans/v4-lineage-design.md
@@ -123,7 +123,7 @@ Notes:
 
 ## Algorithm
 
-```
+```text
 1. Match by channel (parallel):
    a) observation & memory:
       - reuse the existing BM25 index from src/functions/search.ts.
diff --git a/docs/plans/v4-lineage-test-case-careful-generator.md b/docs/plans/v4-lineage-test-case-careful-generator.md
index 2cd7b4b0..dc7d36ae 100644
--- a/docs/plans/v4-lineage-test-case-careful-generator.md
+++ b/docs/plans/v4-lineage-test-case-careful-generator.md
@@ -16,7 +16,7 @@ secretly testing several capabilities at once.
 **Definition.** From `docs/architecture.md:308-309` and
 `docs/configuration.md:176-177`:
 
-```
+```text
 analyse_manifest:  vast-qwen36-35b   # Tier 2 — careful generator
 diff_complex:      vast-qwen36-35b
 ```

From 530d92983e992eee9a8e82ff077d57aff7dcff4d Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 18:46:54 +0200
Subject: [PATCH 7/8] =?UTF-8?q?fix(v5-a):=20CodeRabbit=20re-review=20nits?=
 =?UTF-8?q?=20=E2=80=94=20README=20counts=20+=20options=20array=20guard?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two nits from CodeRabbit's second pass on #574:

1. README:1192 said `core` = 8 tools. ESSENTIAL_TOOLS now contains 9
   entries after this PR adds memory_query. Bump 8 → 9. Also bump
   "Extended tools (51 total)" → 55 to match the headline (51 → 55
   was missed when I normalized the other README spots earlier).

   Note: the "Core tools (always available)" table at README:809 lists
   11 entries vs ESSENTIAL_TOOLS' 9 — that's pre-existing doc drift
   unrelated to this PR. Leaving for a separate cleanup.

2. src/mcp/server.ts memory_query options guard accepted arrays
   because `typeof [] === "object"`. Reject arrays explicitly so a
   malformed `options: []` is caught at the MCP boundary with 400
   instead of relying on downstream validation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md         |  4 ++--
 src/mcp/server.ts | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 92dc045f..f1b97fe9 100644
--- a/README.md
+++ b/README.md
@@ -825,7 +825,7 @@ npm install @xenova/transformers
 </details>
 
 <details>
-<summary>Extended tools (51 total — set AGENTMEMORY_TOOLS=all)</summary>
+<summary>Extended tools (55 total — set AGENTMEMORY_TOOLS=all)</summary>
 
 | Tool | Description |
 |------|-------------|
@@ -1189,7 +1189,7 @@ Create `~/.agentmemory/.env`:
 # USER_ID=
 # TEAM_MODE=private
 
-# Tool visibility: "core" (8 tools) or "all" (55 tools)
+# Tool visibility: "core" (9 tools) or "all" (55 tools)
 # AGENTMEMORY_TOOLS=core
 ```
 
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index fa22aa1e..4a09607e 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -353,7 +353,20 @@ export function registerMcpEndpoints(
               };
             }
             const payload: Record<string, unknown> = { pipeline: args.pipeline };
-            if (args.options !== undefined && typeof args.options === "object" && args.options !== null) {
+            if (args.options !== undefined) {
+              // typeof [] === "object", so guard against arrays too —
+              // schema requires a plain object. CodeRabbit caught this
+              // on #574.
+              if (
+                typeof args.options !== "object" ||
+                args.options === null ||
+                Array.isArray(args.options)
+              ) {
+                return {
+                  status_code: 400,
+                  body: { error: "options must be an object" },
+                };
+              }
               payload.options = args.options;
             }
             const result = await sdk.trigger({

From 3c1413b2659ddc9b6330c4b921ad1f944881ffb4 Mon Sep 17 00:00:00 2001
From: Ruben de Smet <ruben@lunascens.io>
Date: Wed, 20 May 2026 19:24:15 +0200
Subject: [PATCH 8/8] =?UTF-8?q?fix(query):=20CodeRabbit=20#574=20batch=20?=
 =?UTF-8?q?=E2=80=94=20deadline=20+=20maxDepth=20+=20dry=5Frun=20+=20last?=
 =?UTF-8?q?=20stream?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four issues from CodeRabbit's #574 review of v5-a, addressed together
since they touch the same file and share testing surface.

1. `timeoutMs` only enforced between steps (deadline check ran before
   each step but not during awaited I/O). A single slow `sdk.trigger()`
   producer or `provider.summarize()` LLM call could hang well past
   the user's timeout. Added `withDeadline()` helper that races the
   awaited promise against `ctx.deadlineAt`; wrapped the three
   unbounded await sites:
     - `runProducer`'s `sdk.trigger`
     - `synthesize`'s `provider.summarize`
     - `rank_by_relevance`'s `provider.summarize`
   The race rejects with `QueryRuntimeError("deadline_exceeded during
   <label>")` which surfaces as `{kind:"error"}` through the existing
   error pathway.

2. `validatePipeline()` hard-coded `DEFAULTS.maxDepth` for the
   for_each-nesting check, so a user setting `options.maxDepth=5` on a
   4-deep pipeline still got rejected. The validator now accepts
   `maxDepth` through its ctx parameter; the registration handler
   computes `earlyMaxDepth` from options BEFORE calling validate, then
   threads it into both the top-level call and the recursive descent.

3. `dry_run` cost estimate ignored `for_each.do` contents — a heavy
   inner pipeline reported the same cost as an empty one, making the
   planning output deceptive. `estimatePipelineCost()` is now
   recursive: top-level cost plus `for_each` inner cost as a range
   (min: 1 iteration, max: maxStepOut iterations). The min/max gap
   reflects that we don't know the input cardinality at plan time.

4. Records pipelines always returned `streams.get("_") ?? []`, so a
   pipeline whose final step writes `out: "foo"` got an empty result
   back. Now the executor tracks the last step's `outputName` and
   returns that stream. Default-flow pipelines (no explicit `out`) are
   unchanged because the last step's `outputName` defaults to "_".

Not addressed (heavy lift, separate PR if pursued):
   - Recursive predicate / for_each.do schemas in tools-registry.ts
     (CodeRabbit flagged that `all`/`any`/`not` and `for_each.do`
     currently bottom out at untyped arrays, losing schema-aware
     autocomplete past one level of nesting). Doable with JSON Schema
     `$ref` but a meaningful design surface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/functions/query.ts | 103 +++++++++++++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 14 deletions(-)

diff --git a/src/functions/query.ts b/src/functions/query.ts
index 2aedf546..d67d2ca9 100644
--- a/src/functions/query.ts
+++ b/src/functions/query.ts
@@ -859,7 +859,11 @@ async function applyRankByRelevance(
   if (records.length === 0) return [];
   const sample = records.slice(0, 50);
   const userPrompt = buildRankPrompt(sample, target);
-  const text = await ctx.provider.summarize(RANK_SYSTEM_PROMPT, userPrompt);
+  const text = await withDeadline(
+    Promise.resolve(ctx.provider.summarize(RANK_SYSTEM_PROMPT, userPrompt)),
+    ctx.deadlineAt,
+    "rank_by_relevance/provider",
+  );
   ctx.llmCalls += 1;
   const scores = parseRankScores(text);
   const scored = records.map((r) => {
@@ -876,10 +880,11 @@ async function applyRankByRelevance(
 
 function validatePipeline(
   pipeline: unknown,
-  ctx: { path?: string; depth?: number } = {},
+  ctx: { path?: string; depth?: number; maxDepth?: number } = {},
 ): { ok: true; pipeline: PipelineStep[] } | { ok: false; error: string } {
   const path = ctx.path ?? "pipeline";
   const depth = ctx.depth ?? 0;
+  const maxDepth = ctx.maxDepth ?? DEFAULTS.maxDepth;
   if (!Array.isArray(pipeline)) return { ok: false, error: `${path} must be an array` };
   if (pipeline.length === 0) return { ok: false, error: `${path} must contain at least one step` };
   if (pipeline.length > 32) return { ok: false, error: `${path} exceeds maximum of 32 steps` };
@@ -899,8 +904,8 @@ function validatePipeline(
       return { ok: false, error: `${path}[${i}]: 'synthesize' must be the terminal step` };
     }
     if (op === "for_each") {
-      if (depth + 1 > DEFAULTS.maxDepth) {
-        return { ok: false, error: `${path}[${i}]: for_each depth exceeds ${DEFAULTS.maxDepth}` };
+      if (depth + 1 > maxDepth) {
+        return { ok: false, error: `${path}[${i}]: for_each depth exceeds ${maxDepth}` };
       }
       const sub = (s as { do?: unknown }).do;
       if (!Array.isArray(sub)) {
@@ -917,7 +922,7 @@ function validatePipeline(
           };
         }
       }
-      const subResult = validatePipeline(sub, { path: `${path}[${i}].do`, depth: depth + 1 });
+      const subResult = validatePipeline(sub, { path: `${path}[${i}].do`, depth: depth + 1, maxDepth });
       if (!subResult.ok) return subResult;
     }
   }
@@ -928,10 +933,27 @@ function validatePipeline(
 // Cost estimation (for dry_run)
 // ---------------------------------------------------------------------------
 
-function estimatePipelineCost(pipeline: PipelineStep[]): { min: number; max: number } {
-  let total = 0;
-  for (const step of pipeline) total += COST_CLASS[step.op];
-  return { min: total, max: total };
+function estimatePipelineCost(
+  pipeline: PipelineStep[],
+  maxStepOut: number = DEFAULTS.maxStepOut,
+): { min: number; max: number } {
+  let min = 0;
+  let max = 0;
+  for (const step of pipeline) {
+    const cost = COST_CLASS[step.op];
+    min += cost;
+    max += cost;
+    if (step.op === "for_each") {
+      // for_each runs the inner pipeline once per input record. We
+      // don't know cardinality at plan time, so:
+      //   - min: assume 1 iteration (worst case for cost-minimizers)
+      //   - max: assume maxStepOut iterations (post-step cap)
+      const sub = estimatePipelineCost(step.do, maxStepOut);
+      min += sub.min;
+      max += sub.max * maxStepOut;
+    }
+  }
+  return { min, max };
 }
 
 // ---------------------------------------------------------------------------
@@ -964,6 +986,35 @@ class QueryRuntimeError extends Error {
   }
 }
 
+// Race an awaited I/O promise against ctx.deadlineAt. Without this,
+// `enforceDeadline()` only fires BETWEEN steps — a single slow
+// producer (`sdk.trigger`) or LLM call (`provider.summarize`) can hang
+// well past the user's timeoutMs while waiting on async work.
+// CodeRabbit caught this on #574.
+async function withDeadline<T>(
+  p: Promise<T>,
+  deadlineAt: number,
+  label: string,
+): Promise<T> {
+  const remaining = deadlineAt - Date.now();
+  if (remaining <= 0) {
+    throw new QueryRuntimeError(`deadline_exceeded before ${label}`);
+  }
+  let timer: ReturnType<typeof setTimeout> | undefined;
+  try {
+    return await Promise.race([
+      p,
+      new Promise<never>((_resolve, reject) => {
+        timer = setTimeout(() => {
+          reject(new QueryRuntimeError(`deadline_exceeded during ${label}`));
+        }, remaining);
+      }),
+    ]);
+  } finally {
+    if (timer !== undefined) clearTimeout(timer);
+  }
+}
+
 async function runProducer(
   step: PipelineStep,
   ctx: ExecCtx,
@@ -1075,7 +1126,11 @@ async function runProducer(
       throw new QueryRuntimeError(`runProducer payload missing for op '${(step as { op: string }).op}'`);
   }
 
-  const raw = await ctx.sdk.trigger({ function_id: fnId, payload });
+  const raw = await withDeadline(
+    Promise.resolve(ctx.sdk.trigger({ function_id: fnId, payload })),
+    ctx.deadlineAt,
+    `${step.op}/sdk.trigger`,
+  );
 
   let records: EnvelopedRecord[];
   switch (step.op) {
@@ -1237,7 +1292,11 @@ async function executeStep(
       const style = step.style ?? "answer";
       const maxCitations = Math.max(1, Math.min(step.maxCitations ?? 6, 20));
       const userPrompt = buildSynthPrompt(input, step.question, style, maxCitations);
-      const text = await ctx.provider.summarize(SYNTH_SYSTEM_PROMPT, userPrompt);
+      const text = await withDeadline(
+        Promise.resolve(ctx.provider.summarize(SYNTH_SYSTEM_PROMPT, userPrompt)),
+        ctx.deadlineAt,
+        "synthesize/provider",
+      );
       ctx.llmCalls += 1;
       const synth = parseSynthesis(text, input, maxCitations);
       return {
@@ -1278,6 +1337,8 @@ async function executePipelineInternal(
   const streams = new Map<string, EnvelopedRecord[]>();
   streams.set("_", initialInput ?? []);
 
+  let lastOutputName: string | undefined;
+
   for (const step of pipeline) {
     enforceDeadline(ctx, step.op);
     const cost = COST_CLASS[step.op];
@@ -1321,6 +1382,7 @@ async function executePipelineInternal(
     }
     const output = result.output.slice(0, ctx.maxStepOut);
     streams.set(result.outputName, output);
+    lastOutputName = result.outputName;
     ctx.budget.spent += cost;
     trace.push({
       op: step.op,
@@ -1333,7 +1395,12 @@ async function executePipelineInternal(
     });
   }
 
-  return { kind: "records", result: streams.get("_") ?? [], trace };
+  // Return the last emitted stream rather than always `_`. A pipeline
+  // whose final step explicitly writes to `out: "foo"` would otherwise
+  // drop its result. Default stream "_" wins for the (common) implicit-
+  // flow case. CodeRabbit caught this on #574.
+  const finalName = lastOutputName ?? "_";
+  return { kind: "records", result: streams.get(finalName) ?? [], trace };
 }
 
 async function executePipeline(
@@ -1394,7 +1461,15 @@ export function registerQueryFunction(
   sdk.registerFunction(
     "mem::query",
     async (data: QueryRequest): Promise<QueryResult> => {
-      const validation = validatePipeline(data?.pipeline);
+      // Compute maxDepth early so validation can honor user-set higher
+      // nesting limits (otherwise a pipeline nested 4-5 levels gets
+      // rejected even when options.maxDepth=5 is explicitly set).
+      const optsEarly = data?.options ?? {};
+      const earlyMaxDepth = Math.min(
+        Math.max(optsEarly.maxDepth ?? DEFAULTS.maxDepth, 1),
+        5,
+      );
+      const validation = validatePipeline(data?.pipeline, { maxDepth: earlyMaxDepth });
       if (!validation.ok) {
         return {
           kind: "error",
@@ -1428,7 +1503,7 @@ export function registerQueryFunction(
         return {
           kind: "dry_run",
           plan: pipeline,
-          estimatedCost: estimatePipelineCost(pipeline),
+          estimatedCost: estimatePipelineCost(pipeline, maxStepOut),
         };
       }