From 597ab479e55715eea486f3f83a400c77cb669c01 Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Sun, 17 May 2026 11:51:38 +0100 Subject: [PATCH] fix(mcp): inject livez probe to kill mcp-standalone test flake (#449) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCP standalone shim's REST proxy probed `:3111/agentmemory/livez` via a hard-coded `fetch()` with a 2s AbortController timeout. In CI, vitest's mock setup raced the in-flight probe — tests randomly failed on call counts and response-shape assertions depending on whether the probe completed before each test's own deadline. This file has been the source of every "10-11 pre-existing failures unrelated to this PR" footnote for the last five releases. Add a `setLivezProbe(fn)` module-level seam in `src/mcp/rest-proxy.ts` (option A in the issue). Default stays the real fetch-based probe — wire behaviour is unchanged in production. `resetHandleForTests()` also resets the probe so tests can't accidentally leak a stub across files. In `test/mcp-standalone.test.ts`, install an instant `ok:false` probe in `beforeEach` so the shim always takes the deterministic InMemoryKV fallback path. Replace `globalThis.fetch` with a trap that throws if hit — any regression that bypasses the seam fails loudly instead of timing out silently and re-introducing the flake. Validation: 10/10 consecutive runs green (~185ms each, down from 2.2s); proxy test file unaffected (14/14); full suite 988/988 stable across 3 runs. --- src/mcp/rest-proxy.ts | 44 +++++++++++++++++++++++++++++----- test/mcp-standalone.test.ts | 47 ++++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/src/mcp/rest-proxy.ts b/src/mcp/rest-proxy.ts index 5c86bcd5..ef03148a 100644 --- a/src/mcp/rest-proxy.ts +++ b/src/mcp/rest-proxy.ts @@ -40,17 +40,48 @@ function authHeader(): Record { return secret ? { authorization: `Bearer ${secret}` } : {}; } +/** + * Probes the agentmemory server's livez endpoint. Returns a Response-shaped + * object whose `ok` flag drives the proxy/local-fallback decision. + * + * Tests can swap this via {@link setLivezProbe} to avoid the real 2s + * AbortController race that destabilises mcp-standalone test runs (#449). + * Production callers should leave it on the default. + */ +export type LivezProbe = ( + url: string, + timeoutMs: number, + headers: Record, +) => Promise<{ ok: boolean; status?: number; statusText?: string }>; + +const defaultLivezProbe: LivezProbe = async (url, timeoutMs, headers) => { + const res = await fetch(`${url}/agentmemory/livez`, { + method: "GET", + headers, + signal: AbortSignal.timeout(timeoutMs), + }); + return { ok: res.ok, status: res.status, statusText: res.statusText }; +}; + +let livezProbe: LivezProbe = defaultLivezProbe; + +/** + * Override the livez probe. Intended for tests — production code should rely + * on the default fetch-based probe. Calling without an argument restores the + * default. Pair with {@link resetHandleForTests} so the cached handle is + * dropped before the next call. + */ +export function setLivezProbe(fn?: LivezProbe): void { + livezProbe = fn ?? defaultLivezProbe; +} + async function probe(url: string): Promise { const timeout = probeTimeoutMs(); try { - const res = await fetch(`${url}/agentmemory/livez`, { - method: "GET", - headers: authHeader(), - signal: AbortSignal.timeout(timeout), - }); + const res = await livezProbe(url, timeout, authHeader()); if (!res.ok) { process.stderr.write( - `[@agentmemory/mcp] livez probe ${url}/agentmemory/livez -> ${res.status} ${res.statusText}; falling back to local InMemoryKV (set AGENTMEMORY_FORCE_PROXY=1 to skip the probe)\n`, + `[@agentmemory/mcp] livez probe ${url}/agentmemory/livez -> ${res.status ?? "?"} ${res.statusText ?? ""}; falling back to local InMemoryKV (set AGENTMEMORY_FORCE_PROXY=1 to skip the probe)\n`, ); } return res.ok; @@ -130,4 +161,5 @@ export function resetHandleForTests(): void { cached = null; cachedAt = 0; probeInFlight = null; + livezProbe = defaultLivezProbe; } diff --git a/test/mcp-standalone.test.ts b/test/mcp-standalone.test.ts index 59bd985e..e5670ec6 100644 --- a/test/mcp-standalone.test.ts +++ b/test/mcp-standalone.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeEach, vi } from "vitest"; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; vi.mock("../src/logger.js", () => ({ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }, @@ -26,8 +26,30 @@ import { } from "../src/mcp/tools-registry.js"; import { InMemoryKV } from "../src/mcp/in-memory-kv.js"; import { handleToolCall } from "../src/mcp/standalone.js"; +import { + resetHandleForTests, + setLivezProbe, +} from "../src/mcp/rest-proxy.js"; import { writeFileSync } from "node:fs"; +// Issue #449: hard-coded fetch() against :3111 in the livez probe was racing +// with vitest's mock setup, making this file the "10-11 pre-existing failures" +// referenced in the last 5 release notes. Stub the probe with an instant +// ok:false response so the shim takes the deterministic InMemoryKV fallback +// path on every test. Guard the real network with a fetch trap so any +// regression that bypasses the DI seam fails loudly instead of timing out. +const instantLocalFallbackProbe = vi.fn(async () => ({ + ok: false, + status: 0, + statusText: "stubbed: forced local fallback", +})); + +const fetchTrap = vi.fn(async (url: unknown) => { + throw new Error( + `unexpected real fetch() call in mcp-standalone.test.ts: ${String(url)} — the livez probe DI stub should have absorbed this`, + ); +}); + describe("Tools Registry", () => { it("getAllTools returns all tools with unique names", () => { const tools = getAllTools(); @@ -120,8 +142,31 @@ describe("InMemoryKV", () => { }); describe("handleToolCall", () => { + const originalFetch = globalThis.fetch; + beforeEach(() => { vi.mocked(writeFileSync).mockClear(); + instantLocalFallbackProbe.mockClear(); + fetchTrap.mockClear(); + // Order matters: resetHandleForTests() restores the default probe and + // clears the cached handle. Install the stub AFTER the reset so the + // shim's next resolveHandle() call hits the stubbed instant-fail path + // instead of the real 2s AbortController fetch. + resetHandleForTests(); + setLivezProbe(instantLocalFallbackProbe); + (globalThis as { fetch: typeof fetch }).fetch = fetchTrap as unknown as typeof fetch; + }); + + afterEach(() => { + (globalThis as { fetch: typeof fetch }).fetch = originalFetch; + resetHandleForTests(); + }); + + it("livez probe stub is invoked instead of the real fetch (issue #449)", async () => { + const kv = new InMemoryKV(); + await handleToolCall("memory_save", { content: "regression guard" }, kv); + expect(instantLocalFallbackProbe).toHaveBeenCalledTimes(1); + expect(fetchTrap).not.toHaveBeenCalled(); }); it("memory_save persists to disk immediately after saving", async () => {