michaelzwang13 · michaelzwang13 · May 24, 2026 · May 23, 2026
diff --git a/backend/agent-config/templates/code-review-engineer.yaml b/backend/agent-config/templates/code-review-engineer.yaml
@@ -7,6 +7,7 @@ required_tools:
 skills:
   - github-list-prs
   - github-pr-review
+  - update-memory
 
 system_prompt: |
   You are a senior code review engineer. Your responsibilities:
@@ -24,6 +25,8 @@ allowed_actions:
   - github.pr.comment
   - github.review.submit
   - github.repo.read
+  - agent.memory.read
+  - agent.memory.write
 
 resource_limits:
   mem_limit: "512m"

diff --git a/backend/agent-runtime/skills/update-memory/SKILL.md b/backend/agent-runtime/skills/update-memory/SKILL.md
@@ -0,0 +1,45 @@
+---
+name: update_memory
+description: Persist a preference or learned fact across sessions so future tasks can use it.
+metadata:
+  { "openclaw": { "requires": { "bins": ["curl"] } } }
+---
+
+# Update Memory
+
+Use this skill to save something you have learned about how the user wants you to work — a style preference, a project convention, a person's role, anything you would want to remember next time. The value is written to the platform's memory store, scoped to you, and will be injected back into your context on the next task. SOUL.md cannot persist across container restarts; this skill is how you carry knowledge forward.
+
+## Save a memory
+
+```
+exec curl -s -X POST "${PLATFORM_GATEWAY_URL}/memory" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer ${AGENT_TOKEN}" \
+  -d '{"key": "KEY", "value": "VALUE"}'
+```
+
+## Read your stored memory
+
+```
+exec curl -s "${PLATFORM_GATEWAY_URL}/memory" \
+  -H "Authorization: Bearer ${AGENT_TOKEN}"
+```
+
+## Parameters
+- `KEY`: a stable, kebab-case-ish identifier (e.g. `style.tone`, `repos.acme-frontend.lang`, `people.alice.role`). Reusing a key overwrites the previous value.
+- `VALUE`: plain text. Keep it short and self-contained — one sentence to a short paragraph.
+
+## When to use
+- A user corrected your style — save the corrected style so you don't repeat the mistake.
+- You learned a project-level convention (preferred review tone, files to skip, urgency rules).
+- A user named a person, repo, or system you didn't know about.
+
+## When not to use
+- Per-task scratchpad details (those live only in the current task).
+- Anything secret or sensitive — memory is stored in the platform DB, not encrypted at rest.
+- Information you can re-derive trivially from the task input.
+
+## Important
+- Choose stable keys. Bad: `note-from-2026-05-23`. Good: `style.review.tone`.
+- Prefer overwriting an existing key to creating a near-duplicate one.
+- If memory grows large, the platform may consolidate it on your behalf — write atomic, well-scoped facts so consolidation can do something useful with them.
diff --git a/backend/app/models/action_log.py b/backend/app/models/action_log.py
@@ -0,0 +1,52 @@
+"""Agent action log — the append-only audit stream the gateway writes a row
+to for every agent-authed call. Allows are logged alongside denials so the
+log is a full work history, not just a violation list.
+
+Phase B left a `logger.warning` stub on the deny path; Phase D promotes it
+to persisted rows here, and extends coverage to the allow path too. The
+work-log surface and the future LLM reflection (issue #23) both read from
+this table.
+"""
+
+from app.database import get_supabase
+
+TABLE = "agent_action_log"
+
+
+class ActionLogModel:
+    @staticmethod
+    def record(
+        agent_id: str,
+        action: str,
+        outcome: str,
+        metadata: dict | None = None,
+    ) -> dict:
+        """Insert one audit row.
+
+        `outcome` is "allowed" or "denied" (matches the DB check constraint).
+        `metadata` carries free-form context — e.g. the role at the time, the
+        endpoint, request shape. Kept jsonb so the schema doesn't churn as we
+        add fields.
+        """
+        data = {
+            "agent_id": agent_id,
+            "action": action,
+            "outcome": outcome,
+            "metadata": metadata or {},
+        }
+        result = get_supabase().table(TABLE).insert(data).execute()
+        return result.data[0]
+
+    @staticmethod
+    def list_by_agent(agent_id: str, limit: int = 100) -> list[dict]:
+        """Return recent rows for an agent, newest first."""
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .select("*")
+            .eq("agent_id", agent_id)
+            .order("created_at", desc=True)
+            .limit(limit)
+            .execute()
+        )
+        return result.data
diff --git a/backend/app/models/agent_memory.py b/backend/app/models/agent_memory.py
@@ -0,0 +1,70 @@
+"""Per-agent memory — the key/value store the agent writes via the
+update-memory skill and reads back as injected role_context at dispatch.
+
+Rows are scoped to (agent_id, key). Last-write-wins on the same key
+(updated_at refreshes); the row count grows with the number of distinct
+preferences, not with task volume. Compaction strategies are tracked in
+issue #23.
+"""
+
+from datetime import datetime, timezone
+
+from app.database import get_supabase
+
+TABLE = "agent_memory"
+
+
+class AgentMemoryModel:
+    @staticmethod
+    def list_by_agent(agent_id: str) -> list[dict]:
+        """Return every memory row for an agent, newest write first."""
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .select("*")
+            .eq("agent_id", agent_id)
+            .order("updated_at", desc=True)
+            .execute()
+        )
+        return result.data
+
+    @staticmethod
+    def get(agent_id: str, key: str) -> dict | None:
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .select("*")
+            .eq("agent_id", agent_id)
+            .eq("key", key)
+            .execute()
+        )
+        return result.data[0] if result.data else None
+
+    @staticmethod
+    def upsert(agent_id: str, key: str, value: str) -> dict:
+        """Set the value for (agent_id, key), refreshing updated_at."""
+        data = {
+            "agent_id": agent_id,
+            "key": key,
+            "value": value,
+            "updated_at": datetime.now(timezone.utc).isoformat(),
+        }
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .upsert(data, on_conflict="agent_id,key")
+            .execute()
+        )
+        return result.data[0]
+
+    @staticmethod
+    def delete(agent_id: str, key: str) -> bool:
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .delete()
+            .eq("agent_id", agent_id)
+            .eq("key", key)
+            .execute()
+        )
+        return len(result.data) > 0
diff --git a/backend/app/models/reviewed_pr.py b/backend/app/models/reviewed_pr.py
@@ -0,0 +1,59 @@
+"""Dedup index for PRs an agent has already reviewed.
+
+Phase C's poll loop reads from this table to skip PRs it has already
+dispatched a review for, so the watcher's natural 120s tick doesn't
+re-review the same PR every cycle. Written server-side by the gateway
+on a successful POST /github/review — never by the watcher, never by
+a skill. Insert-only; rows persist as the audit trail of what was
+reviewed (read: "this is the agent's PR history").
+"""
+
+from app.database import get_supabase
+
+TABLE = "reviewed_prs"
+
+
+class ReviewedPRModel:
+    @staticmethod
+    def exists(agent_id: str, owner: str, repo: str, pr_number: int) -> bool:
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .select("id")
+            .eq("agent_id", agent_id)
+            .eq("owner", owner)
+            .eq("repo", repo)
+            .eq("pr_number", pr_number)
+            .execute()
+        )
+        return bool(result.data)
+
+    @staticmethod
+    def record(agent_id: str, owner: str, repo: str, pr_number: int) -> dict:
+        """Insert a row marking a PR as reviewed for this agent.
+
+        The (agent_id, owner, repo, pr_number) unique constraint makes
+        re-inserts idempotent at the DB level; this method assumes the
+        caller has not already inserted the same row.
+        """
+        data = {
+            "agent_id": agent_id,
+            "owner": owner,
+            "repo": repo,
+            "pr_number": pr_number,
+        }
+        result = get_supabase().table(TABLE).insert(data).execute()
+        return result.data[0]
+
+    @staticmethod
+    def list_by_agent(agent_id: str, limit: int = 100) -> list[dict]:
+        result = (
+            get_supabase()
+            .table(TABLE)
+            .select("*")
+            .eq("agent_id", agent_id)
+            .order("reviewed_at", desc=True)
+            .limit(limit)
+            .execute()
+        )
+        return result.data
diff --git a/backend/app/routers/gateway.py b/backend/app/routers/gateway.py
@@ -5,6 +5,8 @@
 from pydantic import BaseModel
 from app.auth import get_current_user
 from app.agent_auth import get_current_agent
+from app.models.agent_memory import AgentMemoryModel
+from app.models.reviewed_pr import ReviewedPRModel
 from app.services.gateway import GatewayService
 from app.services.policy import require_action
 from app.services.credential_store import CredentialStore
@@ -52,6 +54,11 @@ class DigestRequest(BaseModel):
     channel: str = "#agentos"
 
 
+class MemoryWriteRequest(BaseModel):
+    key: str
+    value: str
+
+
 # ── Write endpoints ────────────────────────────────────────────────────────────
 
 @router.post("/email/send")
@@ -117,13 +124,24 @@ async def create_pr_review(
 ):
     require_action(agent, "github.review.submit")
     try:
-        return await GatewayService.create_pr_review(
+        result = await GatewayService.create_pr_review(
             agent["user_id"], payload.owner, payload.repo,
             payload.pull_number, payload.body, payload.event,
         )
     except ValueError as e:
         raise HTTPException(400, str(e))
 
+    # The review landed on GitHub — record dedup so Phase C's watcher
+    # doesn't re-review this PR on the next tick. Idempotent at the
+    # DB layer via the (agent_id, owner, repo, pr_number) unique constraint.
+    if not ReviewedPRModel.exists(
+        agent["id"], payload.owner, payload.repo, payload.pull_number
+    ):
+        ReviewedPRModel.record(
+            agent["id"], payload.owner, payload.repo, payload.pull_number
+        )
+    return result
+
 
 @router.post("/github/review/comment")
 async def create_pr_review_comment(
@@ -140,6 +158,30 @@ async def create_pr_review_comment(
         raise HTTPException(400, str(e))
 
 
+# ── Agent memory (agent-token auth + action policy) ────────────────────────────
+# The agent reads its own key/value store (injected into role_context at dispatch
+# in Phase C) and writes via the update-memory skill. Memory rows are scoped to
+# the calling agent_id — an agent cannot see or write another agent's memory.
+
+@router.get("/memory")
+async def list_memory(agent: dict = Depends(get_current_agent)):
+    require_action(agent, "agent.memory.read")
+    rows = AgentMemoryModel.list_by_agent(agent["id"])
+    return {"memory": [{"key": r["key"], "value": r["value"], "updated_at": r["updated_at"]} for r in rows]}
+
+
+@router.post("/memory")
+async def write_memory(
+    payload: MemoryWriteRequest,
+    agent: dict = Depends(get_current_agent),
+):
+    require_action(agent, "agent.memory.write")
+    if not payload.key:
+        raise HTTPException(400, "key is required")
+    row = AgentMemoryModel.upsert(agent["id"], payload.key, payload.value)
+    return {"key": row["key"], "value": row["value"], "updated_at": row["updated_at"]}
+
+
 @router.post("/discord/message")
 async def send_discord_message(
     payload: DiscordRequest,

diff --git a/backend/app/services/dispatcher.py b/backend/app/services/dispatcher.py
@@ -1,14 +1,29 @@
 """Platform-side service that sends tasks to agent containers over HTTP."""
 
+import logging
 import uuid
 
 import httpx
 from docker.errors import NotFound
 
 from app.config import get_settings
+from app.models.agent_memory import AgentMemoryModel
 from app.services.orchestrator import Orchestrator
 
 AGENT_PORT = 8080
+logger = logging.getLogger(__name__)
+
+
+def _load_memory(agent_id: str) -> dict:
+    """Return the agent's persisted memory as a {key: value} dict for injection
+    into role_context. Compaction strategies (LRU / LLM reflection) land here
+    later — see issue #23. Best-effort: a DB hiccup must not block dispatch."""
+    try:
+        rows = AgentMemoryModel.list_by_agent(agent_id)
+    except Exception as exc:  # noqa: BLE001 — best-effort
+        logger.warning("dispatcher: memory load failed for agent=%s: %s", agent_id, exc)
+        return {}
+    return {row["key"]: row["value"] for row in rows}
 
 
 class Dispatcher:
@@ -20,10 +35,12 @@ async def dispatch_task(
         self, agent_id: str, instruction: str, metadata: dict | None = None
     ) -> dict:
         container_ip = self._orch.get_container_ip(agent_id)
+        # Inject the agent's persisted memory into role_context so it sees
+        # back what update-memory wrote on previous tasks.
         task_payload = {
             "task_id": str(uuid.uuid4()),
             "instruction": instruction,
-            "role_context": {},
+            "role_context": {"memory": _load_memory(agent_id)},
             "metadata": metadata or {},
         }