From 71ae3ae8cc9f38f96e06ea8c1df28c480712d90d Mon Sep 17 00:00:00 2001 From: Omar Dominguez <256392855+odominguez7@users.noreply.github.com> Date: Wed, 20 May 2026 18:51:39 -0400 Subject: [PATCH 1/5] feat(browse): tee detached server stdio to /browse-server.log After proc.unref() in startServer(), the spawned server's stdout/stderr write to FDs that nothing reads. Every console.log/error from the detached server is lost, so a 'why did the daemon die' question has no log to consult. Open the log file in append mode, point the child's stdio at the fd, stash the path on the proc handle so the startup-failure path can read the tail when the pipe is no longer attached. Tail read is bounded to the last 64KB via seek-from-end so a multi-failed-startup log doesn't get fully read into memory. Co-Authored-By: Claude Opus 4.7 --- browse/src/cli.ts | 51 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/browse/src/cli.ts b/browse/src/cli.ts index 4f523bea7a..95513a55fb 100644 --- a/browse/src/cli.ts +++ b/browse/src/cli.ts @@ -241,10 +241,32 @@ async function startServer(extraEnv?: Record): Promise= 0 ? logFd : 'pipe', logFd >= 0 ? logFd : 'pipe'], env: { ...process.env, BROWSE_STATE_FILE: config.stateFile, BROWSE_PARENT_PID: parentPid, ...extraEnv }, }); + // Stash the log path for the startup-failure path (proc.stderr is not a + // pipe when stdio was redirected to a file fd, so getReader() would + // throw — read the tail of the log file instead). + if (logFd >= 0) (proc as any).__logPath = logPath; proc.unref(); } @@ -261,8 +283,31 @@ async function startServer(extraEnv?: Record): Promise Date: Wed, 20 May 2026 18:51:54 -0400 Subject: [PATCH 2/5] feat(browse): classifyDisconnect splits Chromium disconnect into clean-drain vs crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The launched-mode browser.on('disconnected') handler treated every Chromium disappear as a crash. When the user closes their last tab, Chromium naturally quits because it has nothing left to display and fires 'disconnected'. The old code printed 'FATAL: Chromium process crashed' and process.exit(1) on that path, killing the sidebar-agent and any in-flight PTY work. Smoking gun in the daemon log: [browse] Tab closed (id=1, remaining=0) [browse] FATAL: Chromium process crashed or was killed. Server exiting. classifyDisconnect({pagesSize, mode}) is a pure helper that splits by pages.size: zero pages means clean drain (browser-empty-{launched,rehead}, exit 0, no FATAL); pages still live means real crash (chromium-crash-*, exit 1, FATAL preserved). Wired into both the launched-mode handler and the post-rehead handler — they collapse from two near-duplicate inline blocks into one shared classification. onDisconnect signature gains an optional exitCode parameter so the classification's exit code flows through to the server's wiring instead of being decorative. The existing browser-disconnect call site from the headed-mode page-close chain omits exitCode and gets the default 2 (preserves the user-close convention). Co-Authored-By: Claude Opus 4.7 --- browse/src/browser-manager.ts | 87 +++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/browse/src/browser-manager.ts b/browse/src/browser-manager.ts index cdbd5fc500..b66a547635 100644 --- a/browse/src/browser-manager.ts +++ b/browse/src/browser-manager.ts @@ -40,6 +40,40 @@ export function isCustomChromium(): boolean { return p.includes('GBrowser') || p.includes('gbrowser'); } +/** + * Classify a Chromium `disconnected` event as a clean drain (the user just + * closed the last tab, Chromium had nothing left to display) vs a real crash. + * + * Before this distinction existed, every disconnect — including the totally + * benign "user closed last tab" path — printed `FATAL: Chromium process + * crashed` and `process.exit(1)`, which killed the sidebar-agent and any + * in-flight PTY work. The smoking gun in the postmortem log was the pair: + * + * [browse] Tab closed (id=N, remaining=0) + * [browse] FATAL: Chromium process crashed or was killed. + * + * Same event, two stories. After: `pages.size === 0` means the disconnect + * is the natural consequence of the close, exit 0, no FATAL. Pages still + * live means real crash, exit 1 — the original behavior, preserved. + * + * Pure function on purpose: unit-testable without spinning up Chromium. + */ +export type DisconnectKind = 'clean-drain' | 'crash'; +export interface DisconnectClassification { + kind: DisconnectKind; + reason: string; + exitCode: 0 | 1; +} +export function classifyDisconnect(opts: { + pagesSize: number; + mode: 'launched' | 'rehead'; +}): DisconnectClassification { + if (opts.pagesSize === 0) { + return { kind: 'clean-drain', reason: `browser-empty-${opts.mode}`, exitCode: 0 }; + } + return { kind: 'crash', reason: `chromium-crash-${opts.mode}`, exitCode: 1 }; +} + export type { RefEntry }; // Re-export TabSession for consumers @@ -119,9 +153,17 @@ export class BrowserManager { // Called when the headed browser disconnects without intentional teardown // (user closed the window). Wired up by server.ts to run full cleanup - // (sidebar-agent, state file, profile locks) before exiting with code 2. - // Returns void or a Promise; rejections are caught and fall back to exit(2). - public onDisconnect: (() => void | Promise) | null = null; + // (sidebar-agent, state file, profile locks) before exiting. + // Returns void or a Promise; rejections are caught and fall back to the + // passed exit code. + // Reason is a short tag describing why the disconnect fired; server.ts + // writes it into ~/.gstack/last-shutdown.json for post-mortem diagnostics. + // exitCode is the process exit code the caller should use after cleanup: + // 0 = clean drain (last tab closed, Chromium had nothing to display), + // 1 = real crash (Chromium died with pages still live), + // 2 = user-explicit close (existing convention from the page-close chain; + // also the default when no exit code is specified). + public onDisconnect: ((reason?: string, exitCode?: 0 | 1 | 2) => void | Promise) | null = null; getConnectionMode(): 'launched' | 'headed' { return this.connectionMode; } @@ -246,11 +288,24 @@ export class BrowserManager { ...(this.proxyConfig ? { proxy: this.proxyConfig } : {}), }); - // Chromium crash → exit with clear message + // Chromium disconnect → classifyDisconnect (see top of file) splits the + // event into clean-drain (last tab closed, exit 0) vs real crash (exit 1). this.browser.on('disconnected', () => { - console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); - console.error('[browse] Console/network logs flushed to .gstack/browse-*.log'); - process.exit(1); + const c = classifyDisconnect({ pagesSize: this.pages.size, mode: 'launched' }); + if (c.kind === 'clean-drain') { + console.log('[browse] Chromium quit after last tab closed. Shutting down cleanly.'); + } else { + console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); + console.error('[browse] Console/network logs flushed to .gstack/browse-*.log'); + } + try { + const result = this.onDisconnect?.(c.reason, c.exitCode); + if (result && typeof (result as Promise).catch === 'function') { + (result as Promise).catch(() => process.exit(c.exitCode)); + return; + } + } catch {} + process.exit(c.exitCode); }); const contextOptions: BrowserContextOptions = { @@ -557,7 +612,7 @@ export class BrowserManager { return; } try { - const result = this.onDisconnect(); + const result = this.onDisconnect('browser-disconnect'); if (result && typeof (result as Promise).catch === 'function') { (result as Promise).catch((err) => { console.error('[browse] onDisconnect rejected:', err); @@ -1336,8 +1391,20 @@ export class BrowserManager { if (this.browser) { this.browser.on('disconnected', () => { if (this.intentionalDisconnect) return; - console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); - process.exit(1); + const c = classifyDisconnect({ pagesSize: this.pages.size, mode: 'rehead' }); + if (c.kind === 'clean-drain') { + console.log('[browse] Chromium quit after last tab closed. Shutting down cleanly.'); + } else { + console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); + } + try { + const result = this.onDisconnect?.(c.reason, c.exitCode); + if (result && typeof (result as Promise).catch === 'function') { + (result as Promise).catch(() => process.exit(c.exitCode)); + return; + } + } catch {} + process.exit(c.exitCode); }); } From f61a51125d4ab1e91b21b976aac8990af3ee7b50 Mon Sep 17 00:00:00 2001 From: Omar Dominguez <256392855+odominguez7@users.noreply.github.com> Date: Wed, 20 May 2026 18:52:08 -0400 Subject: [PATCH 3/5] feat(browse): recordShutdownReason writes tagged postmortem JSON on every exit path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every shutdown — idle-timeout, parent-exit, SIGINT/SIGTERM, browser-disconnect, the new browser-empty-* clean drain, chromium-crash-*, and explicit /stop — now writes /last-shutdown.json with {ts, pid, reason, mode, uptimeSeconds, detail}. Written atomically via tmp-then-rename (${outPath}.${pid}.tmp) so two concurrent browse instances sharing a state dir cannot interleave JSON bytes. The shutdownReasonRecorded flag gates the fallback 'shutdown-called' reason in buildFetchHandler.shutdown() — specific callers record their own more-specific reason first; the fallback only fires when nothing else did. Most-recent-wins overwrite is intentional (a SIGINT that arrives after a browser-disconnect IS the actual cause of teardown). onDisconnect dispatcher uses the exit code passed by classifyDisconnect (0 for clean drain, 1 for crash) or defaults to 2 for the existing user-close convention from the page-close chain. Co-Authored-By: Claude Opus 4.7 --- browse/src/server.ts | 70 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/browse/src/server.ts b/browse/src/server.ts index 9f6866a9db..f83afc86bd 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -583,6 +583,39 @@ async function flushBuffers() { // Flush every 1 second const flushInterval = setInterval(flushBuffers, 1000); +// ─── Last-Shutdown Recording ─────────────────────────────────── +// Write a postmortem JSON before exit so the user can tell why the server +// died. Overwritten each shutdown — most recent reason wins. Specific +// callers (idle-timeout, parent-exit, signal handlers, browser-disconnect) +// record their reason BEFORE calling activeShutdown; the shutdown() +// function itself records 'shutdown-called' as a fallback so every exit +// path leaves a trail. +let shutdownReasonRecorded = false; +function recordShutdownReason(reason: string, detail: Record = {}) { + try { + const stateDir = path.dirname(config.stateFile); + const outPath = path.join(stateDir, 'last-shutdown.json'); + const payload = { + ts: new Date().toISOString(), + pid: process.pid, + reason, + mode: browserManager.getConnectionMode(), + uptimeSeconds: Math.round(process.uptime()), + detail, + }; + // Atomic write: two browse instances writing to the same state dir + // can otherwise interleave JSON bytes and leave a corrupt file. + // tmp-then-rename is the standard POSIX pattern. + const tmpPath = `${outPath}.${process.pid}.tmp`; + fs.writeFileSync(tmpPath, JSON.stringify(payload, null, 2)); + fs.renameSync(tmpPath, outPath); + shutdownReasonRecorded = true; + console.log(`[browse] Shutdown reason recorded: ${reason}`); + } catch (err: any) { + console.warn('[browse] Failed to record shutdown reason:', err?.message); + } +} + // ─── Idle Timer ──────────────────────────────────────────────── let lastActivity = Date.now(); @@ -598,6 +631,7 @@ const idleCheckInterval = setInterval(() => { if (tunnelActive) return; if (Date.now() - lastActivity > IDLE_TIMEOUT_MS) { console.log(`[browse] Idle for ${IDLE_TIMEOUT_MS / 1000}s, shutting down`); + recordShutdownReason('idle-timeout', { idleSeconds: Math.round((Date.now() - lastActivity) / 1000) }); activeShutdown?.(); } }, 60_000); @@ -641,6 +675,7 @@ if (BROWSE_PARENT_PID > 0 && !IS_HEADED_WATCHDOG) { const headed = browserManager.getConnectionMode() === 'headed'; if (headed || tunnelActive) { console.log(`[browse] Parent process ${BROWSE_PARENT_PID} exited in ${headed ? 'headed' : 'tunnel'} mode, shutting down`); + recordShutdownReason('parent-exit', { parentPid: BROWSE_PARENT_PID, mode: headed ? 'headed' : 'tunnel' }); activeShutdown?.(); } else if (!parentGone) { parentGone = true; @@ -678,10 +713,21 @@ function emitInspectorEvent(event: any): void { // ─── Server ──────────────────────────────────────────────────── const browserManager = new BrowserManager(); -// When the user closes the headed browser window, run full cleanup -// (kill sidebar-agent, save session, remove profile locks, delete state file) -// before exiting with code 2. Exit code 2 distinguishes user-close from crashes (1). -browserManager.onDisconnect = () => activeShutdown?.(2); +// Exit-code semantics (passed by browser-manager.ts via classifyDisconnect +// for the Chromium 'disconnected' paths, or unset for the headed-mode +// page-close chain that uses the existing user-close convention): +// 0 — clean drain (Chromium quit after last tab closed) +// 1 — real crash (Chromium died with pages still live, preserves the +// legacy FATAL exit convention) +// 2 — user-explicit close from the page-close chain (default when no +// exit code is passed) +// activeShutdown runs the same full cleanup (kill sidebar-agent, save +// session, remove profile locks, delete state file) regardless. +browserManager.onDisconnect = (reason?: string, exitCode?: 0 | 1 | 2) => { + const code = exitCode ?? 2; + recordShutdownReason(reason || 'browser-disconnect', { exitCode: code }); + return activeShutdown?.(code); +}; let isShuttingDown = false; // Test if a port is available by binding and immediately releasing. @@ -1164,7 +1210,10 @@ async function handleCommand(body: any, tokenInfo?: TokenInfo | null): Promise activeShutdown?.()); + process.on('SIGINT', () => { + recordShutdownReason('signal', { signal: 'SIGINT' }); + activeShutdown?.(); + }); // SIGTERM behavior depends on mode: // - Normal (headless) mode: Claude Code's Bash sandbox fires SIGTERM when the // parent shell exits between tool invocations. Ignoring it keeps the server @@ -1182,6 +1231,7 @@ if (import.meta.main) { const headed = browserManager.getConnectionMode() === 'headed'; if (headed || tunnelActive) { console.log(`[browse] Received SIGTERM in ${headed ? 'headed' : 'tunnel'} mode, shutting down`); + recordShutdownReason('signal', { signal: 'SIGTERM', mode: headed ? 'headed' : 'tunnel' }); activeShutdown?.(); } else { console.log('[browse] Received SIGTERM (ignoring — use /stop or Ctrl+C for intentional shutdown)'); @@ -1313,6 +1363,16 @@ export function buildFetchHandler(cfg: ServerConfig): ServerHandle { if (isShuttingDown) return; isShuttingDown = true; + // Fallback diagnostics: if no caller recorded a reason yet (e.g. /stop, + // explicit $B disconnect, or a future code path we forgot to wire), + // log something rather than exiting silently. Specific callers + // (idle-timeout, parent-exit, signal handlers, browser-disconnect) + // record their own more-specific reason BEFORE calling activeShutdown, + // and recordShutdownReason overwrites by design — most recent wins. + if (!shutdownReasonRecorded) { + recordShutdownReason('shutdown-called', { exitCode }); + } + console.log('[browse] Shutting down...'); if (ownsTerminalAgent) { try { From 843546448423d0f79bf3e5e530031efca20f4a6e Mon Sep 17 00:00:00 2001 From: Omar Dominguez <256392855+odominguez7@users.noreply.github.com> Date: Wed, 20 May 2026 18:52:41 -0400 Subject: [PATCH 4/5] test+chore: classifyDisconnect regression suite, bump VERSION + CHANGELOG (v1.43.1.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5-test suite pins all four (mode × pagesSize) combinations of classifyDisconnect plus the reason-tag suffix invariant. If a future refactor changes the reason format or breaks the clean-drain split, CI catches it before the next user's sidebar dies. Cross-model adversarial pass via /codex challenge surfaced 14 findings; the 5 must-fix and should-fix items (exit-code wiring being decorative, postmortem exitCode internal consistency, atomic write, bounded tail read, exit-code pass-through refactor) landed before sign-off. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 62 +++++++++++++++++++ VERSION | 2 +- ...rowser-manager-classify-disconnect.test.ts | 57 +++++++++++++++++ package.json | 2 +- 4 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 browse/test/browser-manager-classify-disconnect.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 18297b0ae8..aa4a299590 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,67 @@ # Changelog +## [1.43.1.0] - 2026-05-20 + +## **Closing your last tab in headed mode stops killing the sidebar Claude.** +## **And every shutdown now leaves a tagged postmortem in `~/.gstack/last-shutdown.json` plus a tee'd server log.** + +The browse daemon's `disconnected` handler in launched mode treated every Chromium disappear as a crash. Same `FATAL: Chromium process crashed` log line, same `process.exit(1)`, same activeShutdown teardown — even when the cause was the user closing their last tab and Chromium naturally quitting because it had nothing left to display. The smoking gun, visible in any developer's daemon log, was the pair: + +``` +[browse] Tab closed (id=1, remaining=0) +[browse] FATAL: Chromium process crashed or was killed. Server exiting. +``` + +Same event, two stories — and in-flight sidebar PTY work died with it every time. This release ships the postmortem instrumentation that surfaced the bug and the fix. `classifyDisconnect()` is a pure helper that splits the event by `pages.size`: zero pages means clean drain (exit 0, no FATAL), one or more pages live means real crash (exit 1, preserve the legacy convention). The helper is wired into both the launched-mode and post-rehead disconnect handlers (they collapse from two near-duplicate inline blocks into one shared classification). `recordShutdownReason()` writes a tagged JSON file to `~/.gstack/last-shutdown.json` on every exit path with `{ts, pid, reason, mode, uptimeSeconds, detail}`, written atomically via tmp-then-rename so two concurrent browse instances can't corrupt each other's postmortem. The detached server's stdout and stderr now tee to `/browse-server.log` so the next time you ask "why did it die," there's an actual log to read — `proc.unref()` no longer sends those bytes into closed file descriptors. + +### The numbers that matter + +Source: `bun test browse/test/browser-manager-classify-disconnect.test.ts browse/test/browser-manager-unit.test.ts browse/test/browser-manager-custom-chromium.test.ts browse/test/server-embedder-terminal-port.test.ts browse/test/build-command-response.test.ts` — 26 tests, all green. Adversarial cross-model review by Codex CLI (`/codex challenge`) at `model_reasoning_effort=high` surfaced 14 findings; the 5 must-fix and should-fix items landed in this PR before the handshake sign-off. + +| Surface | Before | After | +|---|---|---| +| User closes last tab in launched headed mode | `FATAL: Chromium process crashed`, exit 1, kills sidebar-agent + in-flight PTY work | `Chromium quit after last tab closed. Shutting down cleanly.`, exit 0, full activeShutdown still runs in order | +| User closes last tab in post-rehead mode | Same FATAL + exit 1 path | Same clean-drain exit 0 path, separate `browser-empty-rehead` reason tag for postmortem disambiguation | +| Detached server `console.log` / `console.error` after `proc.unref()` | stdio piped to FDs nothing reads — every message lost | Append-mode tee at `/browse-server.log`, survives CLI exit; startup-failure path reads last 64KB via seek-from-end, not the whole file | +| Diagnostic trail when the daemon dies | "you tell me" — no log, no file, no tag | `~/.gstack/last-shutdown.json` with `{ts, pid, reason, mode, uptimeSeconds, detail.exitCode}`, atomic via `.pid.tmp` + rename | +| Exit paths tagged with a specific reason | 0 | 9: `idle-timeout`, `parent-exit`, `signal` (SIGINT/SIGTERM with signal name in detail), `browser-disconnect`, `browser-empty-launched`, `browser-empty-rehead`, `chromium-crash-launched`, `chromium-crash-rehead`, `shutdown-called` (fallback when nothing else recorded a reason) | +| Disconnect classification testability | Inline duplicated logic in two browser-manager.ts handlers, required Chromium to exercise | Pure `classifyDisconnect({pagesSize, mode})` helper, 5 unit tests pin launched/rehead × empty/live × reason-tag invariant, no browser needed | +| `onDisconnect` callback signature | `(() => void | Promise) | null` — exit code hardcoded to 2 in server wiring | `((reason?: string, exitCode?: 0 | 1 | 2) => ...) | null` — server uses passed exit code (0 clean drain, 1 crash) or defaults to 2 for the existing user-close convention from the page-close chain | + +### What this means for builders + +When your browse daemon dies, `cat ~/.gstack/last-shutdown.json` tells you exactly which path it took, and `tail ~/.gstack/browse-server.log` shows the messages the detached server tried to print before exiting. If you use headed mode with the sidebar, closing your last tab no longer kills in-flight PTY or sidebar-agent work — Chromium quits, the server logs `Shutting down cleanly`, exits 0, every other teardown step still runs in order. If you embed `buildFetchHandler` in your own server, the postmortem and tee still work — they use `config.stateFile`'s directory, same as the existing `ownsTerminalAgent` gate. + +### Itemized changes + +**Added** + +- `classifyDisconnect({pagesSize, mode}): {kind, reason, exitCode}` exported pure function in `browse/src/browser-manager.ts:74`. Splits Chromium `disconnected` events into clean-drain (`pagesSize === 0`, exit 0) vs crash (exit 1). JSDoc names the original failure mode and pins the log evidence so a future refactor can't drop the split without seeing why it exists. +- `recordShutdownReason(reason, detail)` in `browse/src/server.ts:594`. Writes `/last-shutdown.json` atomically (`${outPath}.${process.pid}.tmp` + `fs.renameSync`). Payload: `{ts, pid, reason, mode, uptimeSeconds, detail}`. `shutdownReasonRecorded` flag gates the fallback at `browse/src/server.ts:1370` so the explicit `shutdown-called` reason only fires when no specific caller recorded its own — most-recent-wins is intentional, but the gate prevents the fallback from clobbering more specific reasons. +- 9 explicit reason tags wired through: `idle-timeout`, `parent-exit` (with `parentPid` + `mode` in detail), `signal` (with `SIGINT`/`SIGTERM` in detail), `browser-disconnect`, `browser-empty-launched`, `browser-empty-rehead`, `chromium-crash-launched`, `chromium-crash-rehead`, `shutdown-called`. +- `/browse-server.log` tee in `browse/src/cli.ts:248`. Opens the file once with `fs.openSync(logPath, 'a')`, points the spawned server's stdio at the fd, writes a `=== CLI pid=N parentPid=M ===` separator on each open. Falls back to `'pipe'` when fd open fails so the startup-failure path still works. +- Bounded 64KB tail read in `browse/src/cli.ts:285` for the startup-failure path. `fs.openSync` + `fs.statSync` + `fs.readSync` from `(size - TAIL_BYTES)`, `fs.closeSync` in `finally`. Avoids `readFileSync` on a potentially large log file across many failed-startup runs. +- `browse/test/browser-manager-classify-disconnect.test.ts`: 5 tests covering launched/rehead × pagesSize 0/N, plus reason-tag suffix invariant (`-launched` / `-rehead` always present so embedder logs can disambiguate at a glance). + +**Changed** + +- `BrowserManager.onDisconnect` signature: `(() => void | Promise) | null` → `((reason?: string, exitCode?: 0 | 1 | 2) => void | Promise) | null`. Both `classifyDisconnect` call sites pass `c.exitCode` through; the headed-mode `browser-disconnect` call site at `browse/src/browser-manager.ts:615` omits the exit code, defaulting to 2 (the existing user-close convention). Server-side dispatcher at `browse/src/server.ts:723` uses `exitCode ?? 2`. +- Both Chromium `disconnected` handlers (launched mode at `browse/src/browser-manager.ts:291`, post-rehead at `:1393`) replaced their inline if/else with a single `classifyDisconnect()` call. The two near-duplicate handlers collapse into one shared classification, with the launched-mode FATAL log extended to remind that console/network logs are flushed to `.gstack/browse-*.log`. +- CLI startup-failure path reads the tail of the new tee log when stdio was redirected. `proc.stderr.getReader()` would throw on a redirected fd; the new branch sniffs `(proc as any).__logPath` and reads via seek. + +**Fixed** + +- `FATAL: Chromium process crashed or was killed` no longer prints when the user closed their last tab and Chromium quit because nothing was left to display. Headed-mode sidebar work survives the close. The launched-mode handler at `browse/src/browser-manager.ts:291` and the post-rehead handler at `:1393` both gain the split. +- Detached server's `console.log` and `console.error` no longer disappear into closed FDs after `proc.unref()`. The startup-failure error message also surfaces when `stdio` is a file fd (the previous code threw on `proc.stderr.getReader()` because no pipe was attached). +- `recordShutdownReason` writes are atomic: two browse instances sharing a state dir cannot interleave bytes mid-write. Standard POSIX tmp-then-rename pattern. +- Startup-failure tail no longer reads the full log file into memory. 64KB hard cap, seek-from-end. + +**For contributors** + +- The 5-case unit test in `browse/test/browser-manager-classify-disconnect.test.ts` pins all four combinations of `(mode: 'launched' | 'rehead') × (pagesSize: 0 | >=1)` plus the reason-tag suffix invariant. If a future refactor changes the reason tag format or breaks the clean-drain split, CI catches it before the next user's sidebar dies. +- Cross-model adversarial pass: `/codex challenge` at `model_reasoning_effort=high` produced 14 findings. The 5 must-fix and should-fix items (exit-code wiring decorative-vs-actual, postmortem `exitCode` internal consistency, atomic write, bounded tail read, plus the exit-code-pass-through refactor) landed in this PR before sign-off. The 9 deferred findings (forward race when Chromium fires `disconnected` before `page.on('close')` drains the pages map, embedder-mode global config collision (pre-existing TODO from v1.42.1.0), tunnel-mode interaction with clean-drain, FD leak in single-CLI-invocation scope, no log rotation, `(proc as any)` stash, inverse-race crash with `pagesSize === 0`, most-recent-reason-wins by design, headed-mode page.on('close') chain default) are tracked but out of scope for this PR. +- The `exitCode` flow through `onDisconnect` is the right place to add a future strict-mode guard if someone wires a non-spec exit code (e.g., bypass TS and pass `5`). The current implementation falls back to `2` for any unset value. + ## [1.42.1.0] - 2026-05-19 ## **Embedder PTY teardown stops clobbering — gbrowser's phoenix overlay survives every shutdown.** diff --git a/VERSION b/VERSION index 65881123fc..85aeaa7f54 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.42.1.0 +1.43.1.0 diff --git a/browse/test/browser-manager-classify-disconnect.test.ts b/browse/test/browser-manager-classify-disconnect.test.ts new file mode 100644 index 0000000000..cfe737b127 --- /dev/null +++ b/browse/test/browser-manager-classify-disconnect.test.ts @@ -0,0 +1,57 @@ +import { describe, test, expect } from 'bun:test'; +import { classifyDisconnect } from '../src/browser-manager'; + +// Pins the FATAL-on-clean-tab-close regression: pages.size === 0 at +// disconnect time means the user closed the last tab, not that Chromium +// crashed. Before this split, both paths exited 1 and the daemon log +// printed `FATAL: Chromium process crashed`, killing the sidebar-agent +// every time a user closed their last tab in headed mode. + +describe('classifyDisconnect — clean-drain vs crash', () => { + test('launched mode, zero pages → clean-drain, exit 0, browser-empty-launched', () => { + expect(classifyDisconnect({ pagesSize: 0, mode: 'launched' })).toEqual({ + kind: 'clean-drain', + reason: 'browser-empty-launched', + exitCode: 0, + }); + }); + + test('launched mode, pages still live → crash, exit 1, chromium-crash-launched', () => { + expect(classifyDisconnect({ pagesSize: 1, mode: 'launched' })).toEqual({ + kind: 'crash', + reason: 'chromium-crash-launched', + exitCode: 1, + }); + expect(classifyDisconnect({ pagesSize: 5, mode: 'launched' })).toEqual({ + kind: 'crash', + reason: 'chromium-crash-launched', + exitCode: 1, + }); + }); + + test('rehead mode, zero pages → clean-drain, exit 0, browser-empty-rehead', () => { + expect(classifyDisconnect({ pagesSize: 0, mode: 'rehead' })).toEqual({ + kind: 'clean-drain', + reason: 'browser-empty-rehead', + exitCode: 0, + }); + }); + + test('rehead mode, pages still live → crash, exit 1, chromium-crash-rehead', () => { + expect(classifyDisconnect({ pagesSize: 3, mode: 'rehead' })).toEqual({ + kind: 'crash', + reason: 'chromium-crash-rehead', + exitCode: 1, + }); + }); + + test('reason tag always carries the mode for postmortem disambiguation', () => { + // ~/.gstack/last-shutdown.json reads this string. Tag must include + // the mode so an embedder log can tell launched-disconnect from + // rehead-disconnect at a glance. + const launched = classifyDisconnect({ pagesSize: 0, mode: 'launched' }); + const rehead = classifyDisconnect({ pagesSize: 0, mode: 'rehead' }); + expect(launched.reason.endsWith('-launched')).toBe(true); + expect(rehead.reason.endsWith('-rehead')).toBe(true); + }); +}); diff --git a/package.json b/package.json index c75857f1df..cf265641b2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "1.42.1.0", + "version": "1.43.1.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", From ff2f9a37f6b91efafa60bf5475b3831c6eff8afb Mon Sep 17 00:00:00 2001 From: Omar Dominguez <256392855+odominguez7@users.noreply.github.com> Date: Wed, 20 May 2026 19:46:16 -0400 Subject: [PATCH 5/5] chore: defer VERSION + CHANGELOG bump to maintainer integration Restore VERSION, CHANGELOG.md, and package.json to upstream/main values so this PR doesn't claim a release slot. Maintainer can bump and write the entry when integrating into the next wave. Keeps all functional changes (classifyDisconnect helper, recordShutdownReason postmortem, stdio tee, 5-test regression suite). Only undoes the version claim. --- CHANGELOG.md | 62 ---------------------------------------------------- VERSION | 2 +- package.json | 2 +- 3 files changed, 2 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa4a299590..18297b0ae8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,67 +1,5 @@ # Changelog -## [1.43.1.0] - 2026-05-20 - -## **Closing your last tab in headed mode stops killing the sidebar Claude.** -## **And every shutdown now leaves a tagged postmortem in `~/.gstack/last-shutdown.json` plus a tee'd server log.** - -The browse daemon's `disconnected` handler in launched mode treated every Chromium disappear as a crash. Same `FATAL: Chromium process crashed` log line, same `process.exit(1)`, same activeShutdown teardown — even when the cause was the user closing their last tab and Chromium naturally quitting because it had nothing left to display. The smoking gun, visible in any developer's daemon log, was the pair: - -``` -[browse] Tab closed (id=1, remaining=0) -[browse] FATAL: Chromium process crashed or was killed. Server exiting. -``` - -Same event, two stories — and in-flight sidebar PTY work died with it every time. This release ships the postmortem instrumentation that surfaced the bug and the fix. `classifyDisconnect()` is a pure helper that splits the event by `pages.size`: zero pages means clean drain (exit 0, no FATAL), one or more pages live means real crash (exit 1, preserve the legacy convention). The helper is wired into both the launched-mode and post-rehead disconnect handlers (they collapse from two near-duplicate inline blocks into one shared classification). `recordShutdownReason()` writes a tagged JSON file to `~/.gstack/last-shutdown.json` on every exit path with `{ts, pid, reason, mode, uptimeSeconds, detail}`, written atomically via tmp-then-rename so two concurrent browse instances can't corrupt each other's postmortem. The detached server's stdout and stderr now tee to `/browse-server.log` so the next time you ask "why did it die," there's an actual log to read — `proc.unref()` no longer sends those bytes into closed file descriptors. - -### The numbers that matter - -Source: `bun test browse/test/browser-manager-classify-disconnect.test.ts browse/test/browser-manager-unit.test.ts browse/test/browser-manager-custom-chromium.test.ts browse/test/server-embedder-terminal-port.test.ts browse/test/build-command-response.test.ts` — 26 tests, all green. Adversarial cross-model review by Codex CLI (`/codex challenge`) at `model_reasoning_effort=high` surfaced 14 findings; the 5 must-fix and should-fix items landed in this PR before the handshake sign-off. - -| Surface | Before | After | -|---|---|---| -| User closes last tab in launched headed mode | `FATAL: Chromium process crashed`, exit 1, kills sidebar-agent + in-flight PTY work | `Chromium quit after last tab closed. Shutting down cleanly.`, exit 0, full activeShutdown still runs in order | -| User closes last tab in post-rehead mode | Same FATAL + exit 1 path | Same clean-drain exit 0 path, separate `browser-empty-rehead` reason tag for postmortem disambiguation | -| Detached server `console.log` / `console.error` after `proc.unref()` | stdio piped to FDs nothing reads — every message lost | Append-mode tee at `/browse-server.log`, survives CLI exit; startup-failure path reads last 64KB via seek-from-end, not the whole file | -| Diagnostic trail when the daemon dies | "you tell me" — no log, no file, no tag | `~/.gstack/last-shutdown.json` with `{ts, pid, reason, mode, uptimeSeconds, detail.exitCode}`, atomic via `.pid.tmp` + rename | -| Exit paths tagged with a specific reason | 0 | 9: `idle-timeout`, `parent-exit`, `signal` (SIGINT/SIGTERM with signal name in detail), `browser-disconnect`, `browser-empty-launched`, `browser-empty-rehead`, `chromium-crash-launched`, `chromium-crash-rehead`, `shutdown-called` (fallback when nothing else recorded a reason) | -| Disconnect classification testability | Inline duplicated logic in two browser-manager.ts handlers, required Chromium to exercise | Pure `classifyDisconnect({pagesSize, mode})` helper, 5 unit tests pin launched/rehead × empty/live × reason-tag invariant, no browser needed | -| `onDisconnect` callback signature | `(() => void | Promise) | null` — exit code hardcoded to 2 in server wiring | `((reason?: string, exitCode?: 0 | 1 | 2) => ...) | null` — server uses passed exit code (0 clean drain, 1 crash) or defaults to 2 for the existing user-close convention from the page-close chain | - -### What this means for builders - -When your browse daemon dies, `cat ~/.gstack/last-shutdown.json` tells you exactly which path it took, and `tail ~/.gstack/browse-server.log` shows the messages the detached server tried to print before exiting. If you use headed mode with the sidebar, closing your last tab no longer kills in-flight PTY or sidebar-agent work — Chromium quits, the server logs `Shutting down cleanly`, exits 0, every other teardown step still runs in order. If you embed `buildFetchHandler` in your own server, the postmortem and tee still work — they use `config.stateFile`'s directory, same as the existing `ownsTerminalAgent` gate. - -### Itemized changes - -**Added** - -- `classifyDisconnect({pagesSize, mode}): {kind, reason, exitCode}` exported pure function in `browse/src/browser-manager.ts:74`. Splits Chromium `disconnected` events into clean-drain (`pagesSize === 0`, exit 0) vs crash (exit 1). JSDoc names the original failure mode and pins the log evidence so a future refactor can't drop the split without seeing why it exists. -- `recordShutdownReason(reason, detail)` in `browse/src/server.ts:594`. Writes `/last-shutdown.json` atomically (`${outPath}.${process.pid}.tmp` + `fs.renameSync`). Payload: `{ts, pid, reason, mode, uptimeSeconds, detail}`. `shutdownReasonRecorded` flag gates the fallback at `browse/src/server.ts:1370` so the explicit `shutdown-called` reason only fires when no specific caller recorded its own — most-recent-wins is intentional, but the gate prevents the fallback from clobbering more specific reasons. -- 9 explicit reason tags wired through: `idle-timeout`, `parent-exit` (with `parentPid` + `mode` in detail), `signal` (with `SIGINT`/`SIGTERM` in detail), `browser-disconnect`, `browser-empty-launched`, `browser-empty-rehead`, `chromium-crash-launched`, `chromium-crash-rehead`, `shutdown-called`. -- `/browse-server.log` tee in `browse/src/cli.ts:248`. Opens the file once with `fs.openSync(logPath, 'a')`, points the spawned server's stdio at the fd, writes a `=== CLI pid=N parentPid=M ===` separator on each open. Falls back to `'pipe'` when fd open fails so the startup-failure path still works. -- Bounded 64KB tail read in `browse/src/cli.ts:285` for the startup-failure path. `fs.openSync` + `fs.statSync` + `fs.readSync` from `(size - TAIL_BYTES)`, `fs.closeSync` in `finally`. Avoids `readFileSync` on a potentially large log file across many failed-startup runs. -- `browse/test/browser-manager-classify-disconnect.test.ts`: 5 tests covering launched/rehead × pagesSize 0/N, plus reason-tag suffix invariant (`-launched` / `-rehead` always present so embedder logs can disambiguate at a glance). - -**Changed** - -- `BrowserManager.onDisconnect` signature: `(() => void | Promise) | null` → `((reason?: string, exitCode?: 0 | 1 | 2) => void | Promise) | null`. Both `classifyDisconnect` call sites pass `c.exitCode` through; the headed-mode `browser-disconnect` call site at `browse/src/browser-manager.ts:615` omits the exit code, defaulting to 2 (the existing user-close convention). Server-side dispatcher at `browse/src/server.ts:723` uses `exitCode ?? 2`. -- Both Chromium `disconnected` handlers (launched mode at `browse/src/browser-manager.ts:291`, post-rehead at `:1393`) replaced their inline if/else with a single `classifyDisconnect()` call. The two near-duplicate handlers collapse into one shared classification, with the launched-mode FATAL log extended to remind that console/network logs are flushed to `.gstack/browse-*.log`. -- CLI startup-failure path reads the tail of the new tee log when stdio was redirected. `proc.stderr.getReader()` would throw on a redirected fd; the new branch sniffs `(proc as any).__logPath` and reads via seek. - -**Fixed** - -- `FATAL: Chromium process crashed or was killed` no longer prints when the user closed their last tab and Chromium quit because nothing was left to display. Headed-mode sidebar work survives the close. The launched-mode handler at `browse/src/browser-manager.ts:291` and the post-rehead handler at `:1393` both gain the split. -- Detached server's `console.log` and `console.error` no longer disappear into closed FDs after `proc.unref()`. The startup-failure error message also surfaces when `stdio` is a file fd (the previous code threw on `proc.stderr.getReader()` because no pipe was attached). -- `recordShutdownReason` writes are atomic: two browse instances sharing a state dir cannot interleave bytes mid-write. Standard POSIX tmp-then-rename pattern. -- Startup-failure tail no longer reads the full log file into memory. 64KB hard cap, seek-from-end. - -**For contributors** - -- The 5-case unit test in `browse/test/browser-manager-classify-disconnect.test.ts` pins all four combinations of `(mode: 'launched' | 'rehead') × (pagesSize: 0 | >=1)` plus the reason-tag suffix invariant. If a future refactor changes the reason tag format or breaks the clean-drain split, CI catches it before the next user's sidebar dies. -- Cross-model adversarial pass: `/codex challenge` at `model_reasoning_effort=high` produced 14 findings. The 5 must-fix and should-fix items (exit-code wiring decorative-vs-actual, postmortem `exitCode` internal consistency, atomic write, bounded tail read, plus the exit-code-pass-through refactor) landed in this PR before sign-off. The 9 deferred findings (forward race when Chromium fires `disconnected` before `page.on('close')` drains the pages map, embedder-mode global config collision (pre-existing TODO from v1.42.1.0), tunnel-mode interaction with clean-drain, FD leak in single-CLI-invocation scope, no log rotation, `(proc as any)` stash, inverse-race crash with `pagesSize === 0`, most-recent-reason-wins by design, headed-mode page.on('close') chain default) are tracked but out of scope for this PR. -- The `exitCode` flow through `onDisconnect` is the right place to add a future strict-mode guard if someone wires a non-spec exit code (e.g., bypass TS and pass `5`). The current implementation falls back to `2` for any unset value. - ## [1.42.1.0] - 2026-05-19 ## **Embedder PTY teardown stops clobbering — gbrowser's phoenix overlay survives every shutdown.** diff --git a/VERSION b/VERSION index 85aeaa7f54..65881123fc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.43.1.0 +1.42.1.0 diff --git a/package.json b/package.json index cf265641b2..c75857f1df 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "1.43.1.0", + "version": "1.42.1.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module",