diff --git a/backend/agent-config/templates/code-review-engineer.yaml b/backend/agent-config/templates/code-review-engineer.yaml index 3e1ee6a..60cf411 100644 --- a/backend/agent-config/templates/code-review-engineer.yaml +++ b/backend/agent-config/templates/code-review-engineer.yaml @@ -7,6 +7,7 @@ required_tools: skills: - github-list-prs - github-pr-review + - update-memory system_prompt: | You are a senior code review engineer. Your responsibilities: @@ -24,6 +25,8 @@ allowed_actions: - github.pr.comment - github.review.submit - github.repo.read + - agent.memory.read + - agent.memory.write resource_limits: mem_limit: "512m" diff --git a/backend/agent-runtime/skills/update-memory/SKILL.md b/backend/agent-runtime/skills/update-memory/SKILL.md new file mode 100644 index 0000000..c2190e7 --- /dev/null +++ b/backend/agent-runtime/skills/update-memory/SKILL.md @@ -0,0 +1,45 @@ +--- +name: update_memory +description: Persist a preference or learned fact across sessions so future tasks can use it. +metadata: + { "openclaw": { "requires": { "bins": ["curl"] } } } +--- + +# Update Memory + +Use this skill to save something you have learned about how the user wants you to work — a style preference, a project convention, a person's role, anything you would want to remember next time. The value is written to the platform's memory store, scoped to you, and will be injected back into your context on the next task. SOUL.md cannot persist across container restarts; this skill is how you carry knowledge forward. + +## Save a memory + +``` +exec curl -s -X POST "${PLATFORM_GATEWAY_URL}/memory" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${AGENT_TOKEN}" \ + -d '{"key": "KEY", "value": "VALUE"}' +``` + +## Read your stored memory + +``` +exec curl -s "${PLATFORM_GATEWAY_URL}/memory" \ + -H "Authorization: Bearer ${AGENT_TOKEN}" +``` + +## Parameters +- `KEY`: a stable, kebab-case-ish identifier (e.g. `style.tone`, `repos.acme-frontend.lang`, `people.alice.role`). Reusing a key overwrites the previous value. +- `VALUE`: plain text. Keep it short and self-contained — one sentence to a short paragraph. + +## When to use +- A user corrected your style — save the corrected style so you don't repeat the mistake. +- You learned a project-level convention (preferred review tone, files to skip, urgency rules). +- A user named a person, repo, or system you didn't know about. + +## When not to use +- Per-task scratchpad details (those live only in the current task). +- Anything secret or sensitive — memory is stored in the platform DB, not encrypted at rest. +- Information you can re-derive trivially from the task input. + +## Important +- Choose stable keys. Bad: `note-from-2026-05-23`. Good: `style.review.tone`. +- Prefer overwriting an existing key to creating a near-duplicate one. +- If memory grows large, the platform may consolidate it on your behalf — write atomic, well-scoped facts so consolidation can do something useful with them. diff --git a/backend/app/models/action_log.py b/backend/app/models/action_log.py new file mode 100644 index 0000000..ce013ed --- /dev/null +++ b/backend/app/models/action_log.py @@ -0,0 +1,52 @@ +"""Agent action log — the append-only audit stream the gateway writes a row +to for every agent-authed call. Allows are logged alongside denials so the +log is a full work history, not just a violation list. + +Phase B left a `logger.warning` stub on the deny path; Phase D promotes it +to persisted rows here, and extends coverage to the allow path too. The +work-log surface and the future LLM reflection (issue #23) both read from +this table. +""" + +from app.database import get_supabase + +TABLE = "agent_action_log" + + +class ActionLogModel: + @staticmethod + def record( + agent_id: str, + action: str, + outcome: str, + metadata: dict | None = None, + ) -> dict: + """Insert one audit row. + + `outcome` is "allowed" or "denied" (matches the DB check constraint). + `metadata` carries free-form context — e.g. the role at the time, the + endpoint, request shape. Kept jsonb so the schema doesn't churn as we + add fields. + """ + data = { + "agent_id": agent_id, + "action": action, + "outcome": outcome, + "metadata": metadata or {}, + } + result = get_supabase().table(TABLE).insert(data).execute() + return result.data[0] + + @staticmethod + def list_by_agent(agent_id: str, limit: int = 100) -> list[dict]: + """Return recent rows for an agent, newest first.""" + result = ( + get_supabase() + .table(TABLE) + .select("*") + .eq("agent_id", agent_id) + .order("created_at", desc=True) + .limit(limit) + .execute() + ) + return result.data diff --git a/backend/app/models/agent_memory.py b/backend/app/models/agent_memory.py new file mode 100644 index 0000000..668b6f0 --- /dev/null +++ b/backend/app/models/agent_memory.py @@ -0,0 +1,70 @@ +"""Per-agent memory — the key/value store the agent writes via the +update-memory skill and reads back as injected role_context at dispatch. + +Rows are scoped to (agent_id, key). Last-write-wins on the same key +(updated_at refreshes); the row count grows with the number of distinct +preferences, not with task volume. Compaction strategies are tracked in +issue #23. +""" + +from datetime import datetime, timezone + +from app.database import get_supabase + +TABLE = "agent_memory" + + +class AgentMemoryModel: + @staticmethod + def list_by_agent(agent_id: str) -> list[dict]: + """Return every memory row for an agent, newest write first.""" + result = ( + get_supabase() + .table(TABLE) + .select("*") + .eq("agent_id", agent_id) + .order("updated_at", desc=True) + .execute() + ) + return result.data + + @staticmethod + def get(agent_id: str, key: str) -> dict | None: + result = ( + get_supabase() + .table(TABLE) + .select("*") + .eq("agent_id", agent_id) + .eq("key", key) + .execute() + ) + return result.data[0] if result.data else None + + @staticmethod + def upsert(agent_id: str, key: str, value: str) -> dict: + """Set the value for (agent_id, key), refreshing updated_at.""" + data = { + "agent_id": agent_id, + "key": key, + "value": value, + "updated_at": datetime.now(timezone.utc).isoformat(), + } + result = ( + get_supabase() + .table(TABLE) + .upsert(data, on_conflict="agent_id,key") + .execute() + ) + return result.data[0] + + @staticmethod + def delete(agent_id: str, key: str) -> bool: + result = ( + get_supabase() + .table(TABLE) + .delete() + .eq("agent_id", agent_id) + .eq("key", key) + .execute() + ) + return len(result.data) > 0 diff --git a/backend/app/models/reviewed_pr.py b/backend/app/models/reviewed_pr.py new file mode 100644 index 0000000..22b2252 --- /dev/null +++ b/backend/app/models/reviewed_pr.py @@ -0,0 +1,59 @@ +"""Dedup index for PRs an agent has already reviewed. + +Phase C's poll loop reads from this table to skip PRs it has already +dispatched a review for, so the watcher's natural 120s tick doesn't +re-review the same PR every cycle. Written server-side by the gateway +on a successful POST /github/review — never by the watcher, never by +a skill. Insert-only; rows persist as the audit trail of what was +reviewed (read: "this is the agent's PR history"). +""" + +from app.database import get_supabase + +TABLE = "reviewed_prs" + + +class ReviewedPRModel: + @staticmethod + def exists(agent_id: str, owner: str, repo: str, pr_number: int) -> bool: + result = ( + get_supabase() + .table(TABLE) + .select("id") + .eq("agent_id", agent_id) + .eq("owner", owner) + .eq("repo", repo) + .eq("pr_number", pr_number) + .execute() + ) + return bool(result.data) + + @staticmethod + def record(agent_id: str, owner: str, repo: str, pr_number: int) -> dict: + """Insert a row marking a PR as reviewed for this agent. + + The (agent_id, owner, repo, pr_number) unique constraint makes + re-inserts idempotent at the DB level; this method assumes the + caller has not already inserted the same row. + """ + data = { + "agent_id": agent_id, + "owner": owner, + "repo": repo, + "pr_number": pr_number, + } + result = get_supabase().table(TABLE).insert(data).execute() + return result.data[0] + + @staticmethod + def list_by_agent(agent_id: str, limit: int = 100) -> list[dict]: + result = ( + get_supabase() + .table(TABLE) + .select("*") + .eq("agent_id", agent_id) + .order("reviewed_at", desc=True) + .limit(limit) + .execute() + ) + return result.data diff --git a/backend/app/routers/gateway.py b/backend/app/routers/gateway.py index d6de63c..9af216d 100644 --- a/backend/app/routers/gateway.py +++ b/backend/app/routers/gateway.py @@ -5,6 +5,8 @@ from pydantic import BaseModel from app.auth import get_current_user from app.agent_auth import get_current_agent +from app.models.agent_memory import AgentMemoryModel +from app.models.reviewed_pr import ReviewedPRModel from app.services.gateway import GatewayService from app.services.policy import require_action from app.services.credential_store import CredentialStore @@ -52,6 +54,11 @@ class DigestRequest(BaseModel): channel: str = "#agentos" +class MemoryWriteRequest(BaseModel): + key: str + value: str + + # ── Write endpoints ──────────────────────────────────────────────────────────── @router.post("/email/send") @@ -117,13 +124,24 @@ async def create_pr_review( ): require_action(agent, "github.review.submit") try: - return await GatewayService.create_pr_review( + result = await GatewayService.create_pr_review( agent["user_id"], payload.owner, payload.repo, payload.pull_number, payload.body, payload.event, ) except ValueError as e: raise HTTPException(400, str(e)) + # The review landed on GitHub — record dedup so Phase C's watcher + # doesn't re-review this PR on the next tick. Idempotent at the + # DB layer via the (agent_id, owner, repo, pr_number) unique constraint. + if not ReviewedPRModel.exists( + agent["id"], payload.owner, payload.repo, payload.pull_number + ): + ReviewedPRModel.record( + agent["id"], payload.owner, payload.repo, payload.pull_number + ) + return result + @router.post("/github/review/comment") async def create_pr_review_comment( @@ -140,6 +158,30 @@ async def create_pr_review_comment( raise HTTPException(400, str(e)) +# ── Agent memory (agent-token auth + action policy) ──────────────────────────── +# The agent reads its own key/value store (injected into role_context at dispatch +# in Phase C) and writes via the update-memory skill. Memory rows are scoped to +# the calling agent_id — an agent cannot see or write another agent's memory. + +@router.get("/memory") +async def list_memory(agent: dict = Depends(get_current_agent)): + require_action(agent, "agent.memory.read") + rows = AgentMemoryModel.list_by_agent(agent["id"]) + return {"memory": [{"key": r["key"], "value": r["value"], "updated_at": r["updated_at"]} for r in rows]} + + +@router.post("/memory") +async def write_memory( + payload: MemoryWriteRequest, + agent: dict = Depends(get_current_agent), +): + require_action(agent, "agent.memory.write") + if not payload.key: + raise HTTPException(400, "key is required") + row = AgentMemoryModel.upsert(agent["id"], payload.key, payload.value) + return {"key": row["key"], "value": row["value"], "updated_at": row["updated_at"]} + + @router.post("/discord/message") async def send_discord_message( payload: DiscordRequest, diff --git a/backend/app/services/dispatcher.py b/backend/app/services/dispatcher.py index afe7370..ff85927 100644 --- a/backend/app/services/dispatcher.py +++ b/backend/app/services/dispatcher.py @@ -1,14 +1,29 @@ """Platform-side service that sends tasks to agent containers over HTTP.""" +import logging import uuid import httpx from docker.errors import NotFound from app.config import get_settings +from app.models.agent_memory import AgentMemoryModel from app.services.orchestrator import Orchestrator AGENT_PORT = 8080 +logger = logging.getLogger(__name__) + + +def _load_memory(agent_id: str) -> dict: + """Return the agent's persisted memory as a {key: value} dict for injection + into role_context. Compaction strategies (LRU / LLM reflection) land here + later — see issue #23. Best-effort: a DB hiccup must not block dispatch.""" + try: + rows = AgentMemoryModel.list_by_agent(agent_id) + except Exception as exc: # noqa: BLE001 — best-effort + logger.warning("dispatcher: memory load failed for agent=%s: %s", agent_id, exc) + return {} + return {row["key"]: row["value"] for row in rows} class Dispatcher: @@ -20,10 +35,12 @@ async def dispatch_task( self, agent_id: str, instruction: str, metadata: dict | None = None ) -> dict: container_ip = self._orch.get_container_ip(agent_id) + # Inject the agent's persisted memory into role_context so it sees + # back what update-memory wrote on previous tasks. task_payload = { "task_id": str(uuid.uuid4()), "instruction": instruction, - "role_context": {}, + "role_context": {"memory": _load_memory(agent_id)}, "metadata": metadata or {}, } diff --git a/backend/app/services/policy.py b/backend/app/services/policy.py index 48a92f6..5d38411 100644 --- a/backend/app/services/policy.py +++ b/backend/app/services/policy.py @@ -13,17 +13,38 @@ from fastapi import HTTPException +from app.models.action_log import ActionLogModel from app.services.template_loader import load_template logger = logging.getLogger(__name__) +def _audit(agent_id: str | None, action: str, outcome: str, role: str) -> None: + """Persist one agent_action_log row; never raise into the request path. + + The audit write must not break the underlying request — a DB hiccup + should log locally and let the gateway response proceed normally. + """ + if not agent_id: + return + try: + ActionLogModel.record( + agent_id=agent_id, + action=action, + outcome=outcome, + metadata={"role": role}, + ) + except Exception as exc: # noqa: BLE001 — best-effort audit + logger.warning("policy: audit write failed (%s): %s", outcome, exc) + + def require_action(agent: dict, action: str) -> None: """Raise HTTP 403 unless the agent's role template permits ``action``. Denied-by-default: anything not explicitly listed in the role template's - ``allowed_actions`` is refused. A denial is recorded as an audit log line - — Phase D promotes this stub to a persisted ``agent_action_log`` row. + ``allowed_actions`` is refused. Both outcomes write a row to + ``agent_action_log`` — Phase D promoted Phase B's deny-only log line to + a persisted audit stream that covers allow + deny. """ role = agent.get("role", "") try: @@ -37,6 +58,7 @@ def require_action(agent: dict, action: str) -> None: "policy: DENY agent=%s role=%s action=%s (not in allowed_actions)", agent.get("id"), role, action, ) + _audit(agent.get("id"), action, "denied", role) raise HTTPException( 403, f"Action '{action}' is not permitted for role '{role}'" ) @@ -45,3 +67,4 @@ def require_action(agent: dict, action: str) -> None: "policy: ALLOW agent=%s role=%s action=%s", agent.get("id"), role, action, ) + _audit(agent.get("id"), action, "allowed", role) diff --git a/backend/migrations/004_code_review_engineer.sql b/backend/migrations/004_code_review_engineer.sql index a194e18..590daff 100644 --- a/backend/migrations/004_code_review_engineer.sql +++ b/backend/migrations/004_code_review_engineer.sql @@ -10,3 +10,58 @@ alter table agents add column if not exists agent_token text; create unique index if not exists idx_agents_agent_token on agents(agent_token); + +-- ── Phase D — agent memory & work log ──────────────────────────────────────── +-- Three tables added together. agent_memory is the per-agent key/value store +-- the update-memory skill writes to, injected into role_context at dispatch. +-- agent_action_log is the append-only audit stream every agent-authed gateway +-- call writes a row to (allow + deny). reviewed_prs is Phase C's dedup index, +-- written server-side on a successful POST /github/review. + +create table if not exists agent_memory ( + id uuid primary key default gen_random_uuid(), + agent_id uuid references agents(id) on delete cascade not null, + key text not null, + value text not null, + updated_at timestamptz default now(), + unique(agent_id, key) +); + +create index if not exists idx_agent_memory_agent_id on agent_memory(agent_id); + +create table if not exists agent_action_log ( + id uuid primary key default gen_random_uuid(), + agent_id uuid references agents(id) on delete cascade not null, + action text not null, + outcome text not null check (outcome in ('allowed', 'denied')), + metadata jsonb default '{}'::jsonb, + created_at timestamptz default now() +); + +create index if not exists idx_agent_action_log_agent_id on agent_action_log(agent_id); +create index if not exists idx_agent_action_log_created_at on agent_action_log(created_at desc); + +create table if not exists reviewed_prs ( + id uuid primary key default gen_random_uuid(), + agent_id uuid references agents(id) on delete cascade not null, + owner text not null, + repo text not null, + pr_number int not null, + reviewed_at timestamptz default now(), + unique(agent_id, owner, repo, pr_number) +); + +create index if not exists idx_reviewed_prs_agent_id on reviewed_prs(agent_id); + +-- RLS — same closed-by-default pattern as the existing tables. +alter table agent_memory enable row level security; +alter table agent_action_log enable row level security; +alter table reviewed_prs enable row level security; + +drop policy if exists "Service role full access on agent_memory" on agent_memory; +drop policy if exists "Service role full access on agent_action_log" on agent_action_log; +drop policy if exists "Service role full access on reviewed_prs" on reviewed_prs; + +create policy "Service role full access on agent_memory" on agent_memory for all using (true); +create policy "Service role full access on agent_action_log" on agent_action_log for all using (true); +create policy "Service role full access on reviewed_prs" on reviewed_prs for all using (true); diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 03ccbce..f80cab0 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -46,17 +46,62 @@ alter table credentials drop constraint if exists credentials_service_check; alter table credentials add constraint credentials_service_check check (service in ('gmail', 'slack', 'discord', 'github', 'hubspot')); +-- ── Agent memory, action log, reviewed PRs ────────────────────────────────── +create table if not exists agent_memory ( + id uuid primary key default gen_random_uuid(), + agent_id uuid references agents(id) on delete cascade not null, + key text not null, + value text not null, + updated_at timestamptz default now(), + unique(agent_id, key) +); + +create index if not exists idx_agent_memory_agent_id on agent_memory(agent_id); + +create table if not exists agent_action_log ( + id uuid primary key default gen_random_uuid(), + agent_id uuid references agents(id) on delete cascade not null, + action text not null, + outcome text not null check (outcome in ('allowed', 'denied')), + metadata jsonb default '{}'::jsonb, + created_at timestamptz default now() +); + +create index if not exists idx_agent_action_log_agent_id on agent_action_log(agent_id); +create index if not exists idx_agent_action_log_created_at on agent_action_log(created_at desc); + +create table if not exists reviewed_prs ( + id uuid primary key default gen_random_uuid(), + agent_id uuid references agents(id) on delete cascade not null, + owner text not null, + repo text not null, + pr_number int not null, + reviewed_at timestamptz default now(), + unique(agent_id, owner, repo, pr_number) +); + +create index if not exists idx_reviewed_prs_agent_id on reviewed_prs(agent_id); + -- ── Row Level Security ─────────────────────────────────────────────────────── -- The backend uses the service-role key, which bypasses RLS. These policies -- keep anon/authenticated access closed by default. alter table users enable row level security; alter table agents enable row level security; alter table credentials enable row level security; +alter table agent_memory enable row level security; +alter table agent_action_log enable row level security; +alter table reviewed_prs enable row level security; drop policy if exists "Service role full access on users" on users; drop policy if exists "Service role full access on agents" on agents; drop policy if exists "Service role full access on credentials" on credentials; +drop policy if exists "Service role full access on agent_memory" on agent_memory; +drop policy if exists "Service role full access on agent_action_log" on agent_action_log; +drop policy if exists "Service role full access on reviewed_prs" on reviewed_prs; create policy "Service role full access on users" on users for all using (true); create policy "Service role full access on agents" on agents for all using (true); create policy "Service role full access on credentials" on credentials for all using (true); +create policy "Service role full access on agent_memory" on agent_memory for all using (true); +create policy "Service role full access on agent_action_log" on agent_action_log for all using (true); +create policy "Service role full access on reviewed_prs" on reviewed_prs for all using (true); diff --git a/backend/tests/test_dispatcher.py b/backend/tests/test_dispatcher.py index 02d2436..0668e91 100644 --- a/backend/tests/test_dispatcher.py +++ b/backend/tests/test_dispatcher.py @@ -40,6 +40,52 @@ async def test_dispatch_task(self, mock_orchestrator, mock_httpx_client): call_url = mock_httpx_client.post.call_args[0][0] assert "172.18.0.5:8080/task" in call_url + @pytest.mark.asyncio + async def test_dispatch_task_injects_memory(self, mock_orchestrator, mock_httpx_client): + """The agent's persisted memory rides into the container as role_context + so the agent sees back what update-memory wrote on previous tasks.""" + from app.services.dispatcher import Dispatcher + + mock_resp = MagicMock() + mock_resp.json.return_value = {"accepted": True, "task_id": "t-1"} + mock_resp.raise_for_status = MagicMock() + mock_httpx_client.post = AsyncMock(return_value=mock_resp) + + with patch("app.services.dispatcher.AgentMemoryModel") as mock_mem: + mock_mem.list_by_agent.return_value = [ + {"key": "style.tone", "value": "concise"}, + {"key": "repos.acme.lang", "value": "TypeScript"}, + ] + dispatcher = Dispatcher(orchestrator=mock_orchestrator) + await dispatcher.dispatch_task("agent-001", "Review PR #42") + + body = mock_httpx_client.post.call_args.kwargs["json"] + assert body["role_context"]["memory"] == { + "style.tone": "concise", + "repos.acme.lang": "TypeScript", + } + + @pytest.mark.asyncio + async def test_dispatch_task_memory_failure_does_not_block( + self, mock_orchestrator, mock_httpx_client + ): + """A memory-load failure must not block dispatch — best-effort.""" + from app.services.dispatcher import Dispatcher + + mock_resp = MagicMock() + mock_resp.json.return_value = {"accepted": True, "task_id": "t-1"} + mock_resp.raise_for_status = MagicMock() + mock_httpx_client.post = AsyncMock(return_value=mock_resp) + + with patch("app.services.dispatcher.AgentMemoryModel") as mock_mem: + mock_mem.list_by_agent.side_effect = RuntimeError("db down") + dispatcher = Dispatcher(orchestrator=mock_orchestrator) + result = await dispatcher.dispatch_task("agent-001", "Do X") + + assert result["accepted"] is True + body = mock_httpx_client.post.call_args.kwargs["json"] + assert body["role_context"] == {"memory": {}} + class TestGetAgentTaskStatus: @pytest.mark.asyncio diff --git a/backend/tests/test_gateway.py b/backend/tests/test_gateway.py index 79ed293..49b7f88 100644 --- a/backend/tests/test_gateway.py +++ b/backend/tests/test_gateway.py @@ -113,12 +113,40 @@ def test_list_pull_requests(self, agent_client): resp = client.get("/gateway/github/pulls/owner/repo") assert resp.status_code == 200 - def test_create_pr_review(self, agent_client): + def test_create_pr_review_records_dedup(self, agent_client): + """On a successful review, the gateway inserts a reviewed_prs row so + Phase C's watcher will not re-review this PR on the next tick.""" client, agent, fake_sb = agent_client with patch("app.services.gateway.CredentialStore") as mock_cs, patch( "app.services.gateway.httpx.AsyncClient" - ) as mock_httpx: + ) as mock_httpx, patch("app.routers.gateway.ReviewedPRModel") as mock_rp: + mock_cs.get.return_value = {"service": "github", "token": "ghp_test", "scopes": []} + mock_resp = MagicMock(status_code=200) + mock_resp.json.return_value = {"id": 1} + mock_httpx.return_value.__aenter__ = AsyncMock( + return_value=MagicMock(request=AsyncMock(return_value=mock_resp)) + ) + mock_httpx.return_value.__aexit__ = AsyncMock(return_value=False) + mock_rp.exists.return_value = False + + resp = client.post( + "/gateway/github/review", + json={ + "owner": "acme", "repo": "api", "pull_number": 42, + "body": "LGTM", "event": "APPROVE", + }, + ) + assert resp.status_code == 200 + mock_rp.record.assert_called_once_with(agent["id"], "acme", "api", 42) + + def test_create_pr_review_skips_dedup_when_already_recorded(self, agent_client): + """If reviewed_prs already has the row, the gateway does not re-insert.""" + client, agent, fake_sb = agent_client + + with patch("app.services.gateway.CredentialStore") as mock_cs, patch( + "app.services.gateway.httpx.AsyncClient" + ) as mock_httpx, patch("app.routers.gateway.ReviewedPRModel") as mock_rp: mock_cs.get.return_value = {"service": "github", "token": "ghp_test", "scopes": []} mock_resp = MagicMock(status_code=200) mock_resp.json.return_value = {"id": 1} @@ -126,18 +154,17 @@ def test_create_pr_review(self, agent_client): return_value=MagicMock(request=AsyncMock(return_value=mock_resp)) ) mock_httpx.return_value.__aexit__ = AsyncMock(return_value=False) + mock_rp.exists.return_value = True resp = client.post( "/gateway/github/review", json={ - "owner": "acme", - "repo": "api", - "pull_number": 42, - "body": "LGTM", - "event": "APPROVE", + "owner": "acme", "repo": "api", "pull_number": 42, + "body": "LGTM", "event": "APPROVE", }, ) assert resp.status_code == 200 + mock_rp.record.assert_not_called() def test_github_no_credential(self, agent_client): client, agent, fake_sb = agent_client @@ -174,3 +201,64 @@ def test_github_action_denied_for_wrong_role(self, client, fake_supabase): headers={"Authorization": "Bearer at_secretary"}, ) assert resp.status_code == 403 + + +class TestMemory: + """The /gateway/memory endpoints let an agent persist key/value preferences + across container restarts. They are agent-authed and policy-gated, so the + agent's role template must list agent.memory.{read,write}.""" + + def test_write_memory(self, agent_client): + client, agent, fake_sb = agent_client + with patch("app.routers.gateway.AgentMemoryModel") as mock_mem: + mock_mem.upsert.return_value = { + "key": "style.tone", "value": "concise", + "updated_at": "2026-05-23T12:00:00+00:00", + } + resp = client.post( + "/gateway/memory", + json={"key": "style.tone", "value": "concise"}, + ) + assert resp.status_code == 200 + mock_mem.upsert.assert_called_once_with(agent["id"], "style.tone", "concise") + + def test_read_memory(self, agent_client): + client, agent, fake_sb = agent_client + with patch("app.routers.gateway.AgentMemoryModel") as mock_mem: + mock_mem.list_by_agent.return_value = [ + {"key": "style.tone", "value": "concise", "updated_at": "t"}, + {"key": "repos.acme.lang", "value": "TypeScript", "updated_at": "t"}, + ] + resp = client.get("/gateway/memory") + assert resp.status_code == 200 + body = resp.json() + assert len(body["memory"]) == 2 + assert {m["key"] for m in body["memory"]} == {"style.tone", "repos.acme.lang"} + mock_mem.list_by_agent.assert_called_once_with(agent["id"]) + + def test_memory_requires_token(self, client): + """Missing Authorization → 401.""" + resp = client.get("/gateway/memory") + assert resp.status_code == 401 + + def test_memory_denied_without_template_permission(self, client, fake_supabase): + """A role whose allowed_actions lack agent.memory.* gets 403, not 200.""" + agent = { + "id": "agent-x", "user_id": "user-001", "role": "customer-support", + "status": "running", "agent_token": "at_cs", + } + fake_supabase.get_table("agents").set_select_result([agent]) + resp = client.get( + "/gateway/memory", + headers={"Authorization": "Bearer at_cs"}, + ) + assert resp.status_code == 403 + + def test_memory_write_requires_key(self, agent_client): + client, agent, fake_sb = agent_client + with patch("app.routers.gateway.AgentMemoryModel"): + resp = client.post( + "/gateway/memory", + json={"key": "", "value": "v"}, + ) + assert resp.status_code == 400 diff --git a/backend/tests/test_policy.py b/backend/tests/test_policy.py index 2601aff..dce21b2 100644 --- a/backend/tests/test_policy.py +++ b/backend/tests/test_policy.py @@ -4,6 +4,8 @@ resolves *who* is calling, require_action enforces *what* they may do. """ +from unittest.mock import patch + import pytest from fastapi import HTTPException @@ -66,3 +68,42 @@ def test_valid_token_resolves_agent(self, fake_supabase): result = get_current_agent("Bearer at_good") assert result["id"] == "a1" assert result["user_id"] == "u1" + + +class TestActionLogAudit: + """Phase D promotes Phase B's deny-only log line to a persisted row, and + extends coverage to the allow path too — every require_action call leaves + an audit trail.""" + + def test_allow_writes_audit_row(self): + agent = {"id": "a1", "role": "code-review-engineer"} + with patch("app.services.policy.ActionLogModel") as mock_log: + require_action(agent, "github.review.submit") + mock_log.record.assert_called_once() + kwargs = mock_log.record.call_args.kwargs + assert kwargs["agent_id"] == "a1" + assert kwargs["action"] == "github.review.submit" + assert kwargs["outcome"] == "allowed" + assert kwargs["metadata"]["role"] == "code-review-engineer" + + def test_deny_writes_audit_row(self): + agent = {"id": "a1", "role": "code-review-engineer"} + with patch("app.services.policy.ActionLogModel") as mock_log: + with pytest.raises(HTTPException): + require_action(agent, "github.pr.merge") + mock_log.record.assert_called_once() + kwargs = mock_log.record.call_args.kwargs + assert kwargs["outcome"] == "denied" + assert kwargs["action"] == "github.pr.merge" + + def test_audit_failure_does_not_break_request(self): + """A DB hiccup on the audit write must not block the policy check.""" + agent = {"id": "a1", "role": "code-review-engineer"} + with patch("app.services.policy.ActionLogModel") as mock_log: + mock_log.record.side_effect = RuntimeError("db down") + # Allow path still returns cleanly. + require_action(agent, "github.review.submit") + # Deny path still raises the policy 403, not the audit error. + with pytest.raises(HTTPException) as exc: + require_action(agent, "github.pr.merge") + assert exc.value.status_code == 403