Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/windows-free-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ jobs:
test/setup-windows-fallback.test.ts \
test/build-script-shell-compat.test.ts \
test/docs-config-keys.test.ts \
test/brain-sync-windows-paths.test.ts \
make-pdf/test/browseClient.test.ts \
make-pdf/test/pdftotext.test.ts
shell: bash
69 changes: 57 additions & 12 deletions bin/gstack-brain-sync
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,11 @@ def load_privacy_map(path):

allowlist_globs = load_lines(allowlist_path)
privacy_map = load_privacy_map(privacy_path)
skip_lines = set(load_lines(skip_path))
# Normalize skip entries to the POSIX form queued paths use, so a backslash
# entry in .brain-skip.txt still matches on Windows. The drain is the safety
# boundary that actually stages files, so it must normalize identically to
# discover_new — otherwise an explicitly-skipped file gets committed.
skip_lines = {s.replace(os.sep, "/") for s in load_lines(skip_path)}

# Read queue; collect unique file paths.
queue_paths = set()
Expand Down Expand Up @@ -253,6 +257,8 @@ subcmd_once() {

# Stage with git add -f (forces past .gitignore=*) explicit paths only.
while IFS= read -r p; do
p="${p%$'\r'}" # Windows: compute_paths_to_stage's python print() emits CRLF;
# a trailing CR makes the pathspec match nothing (silent no-stage).
[ -z "$p" ] && continue
git -C "$GSTACK_HOME" add -f -- "$p" 2>/dev/null || true
done < "$paths_file"
Expand Down Expand Up @@ -376,10 +382,13 @@ subcmd_discover_new() {
exit 0
fi
# Walk allowlist globs; enqueue any file where mtime+size differs from cursor.
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" "$SCRIPT_DIR/gstack-brain-enqueue" <<'PYEOF' 2>/dev/null || true
import sys, os, json, glob, fnmatch, subprocess, hashlib
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" <<'PYEOF' 2>/dev/null || true
import sys, os, json, fnmatch
from datetime import datetime, timezone

gstack_home, allowlist_path, cursor_path, enqueue_bin = sys.argv[1:5]
gstack_home, allowlist_path, cursor_path = sys.argv[1:4]
queue_path = os.path.join(gstack_home, ".brain-queue.jsonl")
skip_path = os.path.join(gstack_home, ".brain-skip.txt")

def load_lines(path):
try:
Expand All @@ -403,8 +412,12 @@ def save_cursor(path, data):
pass

allowlist = load_lines(allowlist_path)
# Normalize skip entries to the same POSIX form as `rel` below, so a
# backslash entry in .brain-skip.txt still matches a normalized path on Windows.
skip = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
cursor = load_cursor(cursor_path)
new_cursor = dict(cursor)
to_enqueue = []

# Walk all files under gstack_home, match against allowlist.
for root, dirs, files in os.walk(gstack_home):
Expand All @@ -413,22 +426,54 @@ for root, dirs, files in os.walk(gstack_home):
continue
for name in files:
full = os.path.join(root, name)
rel = os.path.relpath(full, gstack_home)
# Repo paths are POSIX-relative. os.path.relpath yields backslash
# separators on Windows, which never match the forward-slash allowlist
# globs (e.g. "projects/*/learnings.jsonl"), so discovery silently
# enqueued nothing under projects/ on Windows. Normalize to "/".
rel = os.path.relpath(full, gstack_home).replace(os.sep, "/")
if rel.startswith(".brain-"):
continue
matched = any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist)
if not matched:
if not any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist):
continue
if rel in skip:
continue
try:
st = os.stat(full)
key = f"{int(st.st_mtime)}:{st.st_size}"
except OSError:
continue
prev = cursor.get(rel)
if prev != key:
# Enqueue via the shim (respects sync mode + skip list).
subprocess.run([enqueue_bin, rel], check=False)
new_cursor[rel] = key
if cursor.get(rel) != key:
to_enqueue.append((rel, key))

# Append to the queue directly. The previous implementation shelled out to
# gstack-brain-enqueue once per file, but Windows Python cannot exec a
# bash-shebang script (the spawn fails with a fork error), so discovery
# enqueued nothing on Windows even after the path-match fix above.
# Writing the queue line here is platform-agnostic; the drain step
# (compute_paths_to_stage) still re-applies the skip-list + privacy filters.
if to_enqueue:
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
try:
# One atomic append per record (O_APPEND, each line < PIPE_BUF), matching
# gstack-brain-enqueue's concurrency contract so a writer-shim append
# running in parallel can't interleave mid-record. Buffered text writes
# don't guarantee that. Compact separators match the shim's JSON shape.
fd = os.open(queue_path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
try:
for rel, key in to_enqueue:
rec = json.dumps({"file": rel, "ts": ts}, separators=(",", ":"))
os.write(fd, (rec + "\n").encode("utf-8"))
finally:
os.close(fd)
except OSError:
# Queue write failed (disk full, AV file lock). Leave the cursor
# unadvanced so these files are retried on the next discover instead of
# being silently recorded as synced (which loses the change until the
# file next changes).
to_enqueue = []
# Advance the cursor only for records actually written.
for rel, key in to_enqueue:
new_cursor[rel] = key

save_cursor(cursor_path, new_cursor)
PYEOF
Expand Down
66 changes: 66 additions & 0 deletions test/brain-sync-windows-paths.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';

// Static invariants guarding Windows artifact-sync (bin/gstack-brain-sync).
//
// These are deliberately static, not behavioral. The brain-sync integration
// suite (test/brain-sync.test.ts) spawns the bin/ scripts directly, which
// Node/Bun cannot exec on Windows (they are bash-shebang scripts), so that
// suite is excluded from the Windows CI lane. Instead we assert the source
// keeps the properties that make `--discover-new` and the `--once` drain work
// on Windows. Each maps to a confirmed, separately-reproduced failure:
//
// 1. os.path.relpath yields BACKSLASH separators on Windows, which never
// match the forward-slash allowlist globs (e.g. "projects/*/learnings.jsonl"),
// so nested artifacts were silently never discovered.
// 2. discover-new enqueued via subprocess.run([bash-shim]); Windows Python
// cannot exec a shebang script, so it enqueued nothing even once matched.
// 3. compute_paths_to_stage's python print() emits CRLF on Windows; the bash
// `read -r` keeps the trailing \r, so `git add -- "path\r"` matches
// nothing and the drain silently stages/commits nothing.
//
// Plus two robustness properties (independent codex review, both [P2]):
// 4. the inline enqueue must append one atomic record at a time (O_APPEND),
// or a concurrent writer-shim append can interleave mid-record and produce
// a malformed queue line that the drain silently drops.
// 5. the skip-list must be normalized to the same separator form as `rel`,
// or a backslash entry in .brain-skip.txt stops matching and a file the
// user explicitly skipped gets synced.
const ROOT = path.resolve(import.meta.dir, '..');
const SRC = fs.readFileSync(path.join(ROOT, 'bin', 'gstack-brain-sync'), 'utf-8');

describe('gstack-brain-sync — Windows path/exec invariants', () => {
test('discover-new normalizes relpath separators before fnmatch (bug 1)', () => {
expect(SRC).toContain('os.path.relpath(full, gstack_home).replace(os.sep, "/")');
});

test('no python subprocess exec — Windows cannot exec the bash shims (bug 2)', () => {
// The whole script must never shell out to a bin/ bash script from Python;
// that is the exec failure that left discover enqueuing nothing on Windows.
expect(SRC).not.toContain('subprocess');
});

test('drain loop strips trailing CR before git add (bug 3)', () => {
const CR_STRIP = "p=\"${p%$'\\r'}\"";
expect(SRC).toContain(CR_STRIP);
// The strip must precede the staging call, or the pathspec still carries \r.
expect(SRC.indexOf(CR_STRIP)).toBeLessThan(SRC.indexOf('add -f -- "$p"'));
});

test('inline enqueue appends one atomic record at a time (codex P2 #1)', () => {
expect(SRC).toContain('os.O_APPEND');
expect(SRC).toContain('os.write(fd');
// No buffered batch write to the queue (the interleave-corruption shape).
expect(SRC).not.toContain('open(queue_path, "a"');
});

test('skip-list is normalized on BOTH discover and drain sides (codex P2 #2)', () => {
// The drain (compute_paths_to_stage) is the real staging boundary, so it
// must normalize skip entries identically to discover_new — otherwise a
// backslash .brain-skip.txt entry is honored at discovery but bypassed at
// commit, syncing a file the user explicitly skipped.
const NORM = 's.replace(os.sep, "/") for s in load_lines(skip_path)';
expect(SRC.split(NORM).length - 1).toBeGreaterThanOrEqual(2);
});
});