diff --git a/CHANGELOG.md b/CHANGELOG.md index 188abb1a..b1c489fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +### Added (TotalZack/agentmemory fork — B1 patch, pending upstream PR #273) + +- **`AGENTMEMORY_REMOTE_REQUIRED` env var** — when set to `1` / `true` / `yes` / `on`, the standalone MCP shim refuses to fall back to in-memory local mode if the `/agentmemory/livez` probe fails at startup. Throws a structured `RemoteUnreachableError` instead. Default off, so existing users see no behavior change. Recommended for production multi-machine deployments. +- **`AGENTMEMORY_LIVEZ_TIMEOUT_MS` env var** — overrides the probe timeout (alias for upstream's `AGENTMEMORY_PROBE_TIMEOUT_MS`; takes precedence). Default 2000 ms (same as upstream since v0.9.7). Useful on high-latency networks (Tailscale across regions, slow Coolify boots) where the default trips the silent-fallback path even when the backend is healthy. +- **Visible stderr line on silent local fallback** — when `AGENTMEMORY_URL` points at a remote backend but the `/livez` probe fails and `REMOTE_REQUIRED` is unset, the shim now emits a warning on stderr (with `url`, `probeTimeoutMs`, and a hint at the two env vars). + ## [0.9.9] — 2026-05-11 Two field-reported regressions closed: pinned memory slots never reached SessionStart context (the `renderPinnedContext` and `listPinnedSlots` helpers shipped in v0.7 had no callers), and the MiniMax compression provider read its base URL straight off `process.env`, missing `~/.agentmemory/.env` values that the rest of agentmemory loads through the shared merged-env path. diff --git a/README.md b/README.md index 6693b7cf..eef22231 100644 --- a/README.md +++ b/README.md @@ -923,6 +923,23 @@ Create `~/.agentmemory/.env`: # Auth # AGENTMEMORY_SECRET=your-secret +# Remote backend resilience (relevant when AGENTMEMORY_URL points at a +# self-hosted server rather than the default localhost). Both default off. +# AGENTMEMORY_REMOTE_REQUIRED=1 # Fail loud (RemoteUnreachableError) + # when the /agentmemory/livez probe + # fails, instead of silently falling + # back to in-memory local mode. + # Recommended for production multi- + # machine setups where a silent + # fallback can land memories in the + # wrong store if the warning is missed. +# AGENTMEMORY_LIVEZ_TIMEOUT_MS=500 # Timeout for the /agentmemory/livez + # probe at startup. Raise it on + # high-latency networks (Tailscale + # across regions, slow container + # boots) where the 500 ms default + # trips the fallback path. + # Ports (defaults: 3111 API, 3113 viewer) # III_REST_PORT=3111 diff --git a/src/mcp/rest-proxy.ts b/src/mcp/rest-proxy.ts index 5c86bcd5..4e0d3a41 100644 --- a/src/mcp/rest-proxy.ts +++ b/src/mcp/rest-proxy.ts @@ -4,7 +4,10 @@ const CALL_TIMEOUT_MS = 15_000; const LOCAL_MODE_TTL_MS = 30_000; function probeTimeoutMs(): number { - const raw = process.env["AGENTMEMORY_PROBE_TIMEOUT_MS"]; + // AGENTMEMORY_LIVEZ_TIMEOUT_MS is the B1 name (documented in client-setup); AGENTMEMORY_PROBE_TIMEOUT_MS is upstream's name + const raw = + process.env["AGENTMEMORY_LIVEZ_TIMEOUT_MS"] ?? + process.env["AGENTMEMORY_PROBE_TIMEOUT_MS"]; if (!raw) return DEFAULT_HEALTH_PROBE_TIMEOUT_MS; const n = Number(raw); return Number.isFinite(n) && n > 0 ? Math.floor(n) : DEFAULT_HEALTH_PROBE_TIMEOUT_MS; @@ -27,6 +30,19 @@ export interface LocalHandle { export type Handle = ProxyHandle | LocalHandle; +export class RemoteUnreachableError extends Error { + constructor( + public readonly url: string, + public readonly probeTimeoutMs: number, + ) { + super( + `AGENTMEMORY_REMOTE_REQUIRED is set but ${url}/agentmemory/livez probe failed (timeout, network error, or non-2xx; timeout=${probeTimeoutMs}ms). ` + + `Either raise AGENTMEMORY_LIVEZ_TIMEOUT_MS, fix backend connectivity, or unset AGENTMEMORY_REMOTE_REQUIRED to allow local-mode fallback.`, + ); + this.name = "RemoteUnreachableError"; + } +} + let cached: Handle | null = null; let cachedAt = 0; let probeInFlight: Promise | null = null; @@ -40,6 +56,13 @@ function authHeader(): Record { return secret ? { authorization: `Bearer ${secret}` } : {}; } +function remoteRequired(): boolean { + const raw = process.env["AGENTMEMORY_REMOTE_REQUIRED"]; + if (!raw) return false; + const v = raw.toLowerCase(); + return v === "1" || v === "true" || v === "yes" || v === "on"; +} + async function probe(url: string): Promise { const timeout = probeTimeoutMs(); try { @@ -56,7 +79,7 @@ async function probe(url: string): Promise { return res.ok; } catch (err) { process.stderr.write( - `[@agentmemory/mcp] livez probe ${url}/agentmemory/livez failed in ${timeout}ms: ${err instanceof Error ? err.message : String(err)}; falling back to local InMemoryKV (set AGENTMEMORY_FORCE_PROXY=1 to skip the probe, or raise AGENTMEMORY_PROBE_TIMEOUT_MS)\n`, + `[@agentmemory/mcp] livez probe ${url}/agentmemory/livez failed in ${timeout}ms: ${err instanceof Error ? err.message : String(err)}; falling back to local InMemoryKV (set AGENTMEMORY_FORCE_PROXY=1 to skip the probe, or raise AGENTMEMORY_LIVEZ_TIMEOUT_MS)\n`, ); return false; } @@ -114,6 +137,12 @@ export async function resolveHandle(): Promise { cachedAt = Date.now(); return handle; } + if (remoteRequired()) { + throw new RemoteUnreachableError(url, probeTimeoutMs()); + } + process.stderr.write( + `[@agentmemory/mcp] agentmemory backend unreachable; falling back to in-memory local mode (url=${url}, probeTimeoutMs=${probeTimeoutMs()}). Set AGENTMEMORY_REMOTE_REQUIRED=1 to fail loud, or AGENTMEMORY_LIVEZ_TIMEOUT_MS=N to allow more probe time.\n`, + ); const local: LocalHandle = { mode: "local" }; cached = local; cachedAt = Date.now(); diff --git a/test/mcp-standalone-proxy.test.ts b/test/mcp-standalone-proxy.test.ts index 6522691d..fc4d0e68 100644 --- a/test/mcp-standalone-proxy.test.ts +++ b/test/mcp-standalone-proxy.test.ts @@ -1,7 +1,12 @@ import { describe, expect, it, beforeEach, afterEach, vi } from "vitest"; import { handleToolCall } from "../src/mcp/standalone.js"; -import { resetHandleForTests } from "../src/mcp/rest-proxy.js"; +import { + RemoteUnreachableError, + resetHandleForTests, + resolveHandle, +} from "../src/mcp/rest-proxy.js"; import { InMemoryKV } from "../src/mcp/in-memory-kv.js"; +import { logger } from "../src/logger.js"; type FetchMock = ReturnType; @@ -292,3 +297,71 @@ describe("@agentmemory/mcp standalone — server proxy (issue #159)", () => { } }); }); + +describe("@agentmemory/mcp standalone — remote-required + livez timeout opt-ins", () => { + const originalFetch = globalThis.fetch; + + beforeEach(() => { + resetHandleForTests(); + process.env["AGENTMEMORY_URL"] = BASE; + delete process.env["AGENTMEMORY_SECRET"]; + delete process.env["AGENTMEMORY_REMOTE_REQUIRED"]; + delete process.env["AGENTMEMORY_LIVEZ_TIMEOUT_MS"]; + }); + + afterEach(() => { + resetHandleForTests(); + globalThis.fetch = originalFetch; + delete process.env["AGENTMEMORY_URL"]; + delete process.env["AGENTMEMORY_REMOTE_REQUIRED"]; + delete process.env["AGENTMEMORY_LIVEZ_TIMEOUT_MS"]; + vi.restoreAllMocks(); + }); + + it("respects AGENTMEMORY_LIVEZ_TIMEOUT_MS — slow /livez aborts and falls back to local", async () => { + process.env["AGENTMEMORY_LIVEZ_TIMEOUT_MS"] = "10"; + let abortFired = false; + const fn = vi.fn( + (_url: string | URL, init?: RequestInit) => + new Promise((_resolve, reject) => { + init?.signal?.addEventListener("abort", () => { + abortFired = true; + reject(new DOMException("aborted", "AbortError")); + }); + // Never resolves on its own — only the abort path completes the promise. + }), + ); + (globalThis as { fetch: typeof fetch }).fetch = fn as unknown as typeof fetch; + + const handle = await resolveHandle(); + expect(handle.mode).toBe("local"); + expect(abortFired).toBe(true); + }); + + it("emits a warn on silent local fallback when REMOTE_REQUIRED is unset", async () => { + const warn = vi.spyOn(logger, "warn").mockImplementation(() => undefined); + installFetch(() => { + throw new Error("ECONNREFUSED"); + }); + + const handle = await resolveHandle(); + expect(handle.mode).toBe("local"); + expect(warn).toHaveBeenCalledTimes(1); + const [msg, fields] = warn.mock.calls[0]; + expect(msg).toMatch(/agentmemory backend unreachable/i); + expect(fields).toMatchObject({ url: BASE }); + }); + + it("throws RemoteUnreachableError when AGENTMEMORY_REMOTE_REQUIRED=1 and probe fails", async () => { + process.env["AGENTMEMORY_REMOTE_REQUIRED"] = "1"; + const warn = vi.spyOn(logger, "warn").mockImplementation(() => undefined); + installFetch(() => { + throw new Error("ECONNREFUSED"); + }); + + await expect(resolveHandle()).rejects.toBeInstanceOf(RemoteUnreachableError); + // The fail-loud path explicitly does not write a warn line — the thrown + // error is the signal, not a log scan. + expect(warn).not.toHaveBeenCalled(); + }); +});