-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathloop_journal.py
More file actions
374 lines (319 loc) Β· 14.9 KB
/
Copy pathloop_journal.py
File metadata and controls
374 lines (319 loc) Β· 14.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#!/usr/bin/env python3
"""simplicio-loop β run-journal + stall/progress detector (the loop's working memory).
The two highest-leverage upgrades to a loop orchestrator, made runnable. The classic re-feed loop
remembers nothing between turns except the git tree β so it can (a) re-derive the same triage every
turn and (b) OSCILLATE: try X, fail, try X again, forever, until the cap burns. This worker gives
the loop an explicit, durable **attempt memory** and a **stall detector** so it changes strategy or
escalates instead of re-feeding the same goal into the same failure.
It is deterministic and model-free β the fingerprint + stall math never call an LLM, so a resume is
reproducible from the on-disk journal (same discipline as `savings_harness`/`billing_aggregator`).
State: `.orchestrator/loop/journal.jsonl` β one append-only record per attempt:
{"iteration", "action", "hypothesis", "gate": "pass|fail|blocked",
"fingerprint": "<stable hash of the failure signature>", "note", "ts"}
Verbs:
record Append one attempt. Pass --gate pass|fail|blocked and (on fail) the gate output via
--gate-output FILE or stdin; the failure FINGERPRINT is computed deterministically
(line-numbers / paths / hex / timestamps normalized away) so the SAME failure hashes
the SAME across turns.
fingerprint Print the stable fingerprint of a failure text (FILE or stdin). Standalone helper.
stall Read the journal β verdict PROGRESS | STALLED. STALLED when the last K consecutive
attempts all failed with the SAME fingerprint (default K=3). Prints the recommended
action (switch-strategy | escalate) and the dead-end actions to avoid. Exit 10 when
stalled (for `if:` gating), 0 otherwise β unless --exit-code is omitted (always 0).
resume The anti-oscillation read: distinct actions already tried + their outcomes + the
current stall count + the live error fingerprint. Print THIS at the top of each turn
so the loop never retries a known dead-end.
status Compact tail of the journal (last N records).
since Incremental triage: the delta (git diff --stat + working tree) since the last
recorded turn's commit β so a turn reads only what changed, not a full re-scan.
selftest Prove the fingerprint + stall logic deterministically β no files.
Usage:
python3 scripts/loop_journal.py record --iteration 3 --action "add retry to fetch" \\
--hypothesis "timeout is transient" --gate fail --gate-output test.log
python3 scripts/loop_journal.py stall [--k 3] [--exit-code]
python3 scripts/loop_journal.py resume
python3 scripts/loop_journal.py status [--n 10]
python3 scripts/loop_journal.py selftest
"""
import hashlib
import json
import os
import re
import sys
import time
try: # Windows consoles default to cp1252 and choke on non-ASCII β force UTF-8.
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
HERE = os.path.dirname(os.path.abspath(__file__))
REPO = os.path.dirname(HERE)
LOOP_DIR = os.path.join(REPO, ".orchestrator", "loop")
JOURNAL = os.path.join(LOOP_DIR, "journal.jsonl")
DEFAULT_K = 3
# Lines that carry the actual failure signal β we fingerprint THESE, not the whole log.
SIGNAL_RE = re.compile(
r"(error|fail|failed|assert|assertion|exception|traceback|panic|fatal|"
r"undefined|not found|cannot|unexpected|β|β|Γ)", re.I)
# Volatile tokens that differ run-to-run for the SAME bug β normalized away so the hash is stable.
_NORMALIZERS = [
(re.compile(r"0x[0-9a-fA-F]+"), "0xADDR"), # pointers/addresses
(re.compile(r"\b[0-9a-f]{7,40}\b"), "HEX"), # sha/uuid-ish
(re.compile(r"\d{4}-\d{2}-\d{2}[t ]\d{2}:\d{2}:\d{2}\S*", re.I), "TS"), # ISO timestamps
(re.compile(r"(:|line )\s*\d+(:\d+)?"), r"\1N"), # file:line:col / "line 42"
(re.compile(r"[/\\][\w./\\-]+/(\w+\.\w+)"), r"PATH/\1"), # dir paths, keep basename
(re.compile(r"0\.\d+s|\d+(\.\d+)?\s*(ms|s|sec|seconds)", re.I), "DUR"), # durations
(re.compile(r"\b\d+\b"), "N"), # any remaining bare integer
(re.compile(r"\s+"), " "), # collapse whitespace
]
def log(msg):
print(" " + msg)
def _read_source(spec):
if spec is None:
return ""
if spec == "-" or spec is True:
return sys.stdin.read()
try:
with open(spec, encoding="utf-8", errors="replace") as f:
return f.read()
except OSError:
return ""
def fingerprint(text):
"""Stable, model-free hash of a failure's SIGNATURE. Empty text -> '' (no failure)."""
if not text or not text.strip():
return ""
signal = [ln.strip() for ln in text.splitlines() if SIGNAL_RE.search(ln)]
# fall back to the last few non-empty lines if nothing matched the signal regex
if not signal:
signal = [ln.strip() for ln in text.splitlines() if ln.strip()][-5:]
blob = "\n".join(signal[:20]).lower()
for rx, repl in _NORMALIZERS:
blob = rx.sub(repl, blob)
return hashlib.sha1(blob.strip().encode("utf-8")).hexdigest()[:12]
def _now():
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
def _git(args):
import subprocess
try:
r = subprocess.run(["git"] + args, capture_output=True, text=True,
encoding="utf-8", errors="replace", cwd=REPO)
return r.stdout.strip() if r.returncode == 0 else None
except FileNotFoundError:
return None
def _git_head():
return _git(["rev-parse", "HEAD"]) or ""
def _load():
rows = []
if not os.path.exists(JOURNAL):
return rows
with open(JOURNAL, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
rows.append(json.loads(line))
except ValueError:
continue # one corrupt record must not lose the journal
return rows
def cmd_record(opts):
os.makedirs(LOOP_DIR, exist_ok=True)
gate = opts.get("gate", "fail")
fp = ""
if gate != "pass":
fp = fingerprint(_read_source(opts.get("gate-output")))
rec = {
"iteration": int(opts.get("iteration", 0)),
"action": opts.get("action", ""),
"hypothesis": opts.get("hypothesis", ""),
"gate": gate,
"fingerprint": fp,
"note": opts.get("note", ""),
"commit": opts.get("_commit") or _git_head(), # for incremental triage (`since`)
"ts": opts.get("_now") or _now(),
}
with open(JOURNAL, "a", encoding="utf-8") as f:
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
log("recorded iter=%d gate=%s fp=%s action=%r" % (
rec["iteration"], rec["gate"], rec["fingerprint"] or "-", rec["action"][:50]))
print("recorded")
def cmd_fingerprint(opts):
src = opts.get("file") or opts.get("input") or "-"
print(fingerprint(_read_source(src)) or "(no-failure)")
def analyze(rows, k=DEFAULT_K):
"""Pure: journal rows -> stall verdict. Deterministic, no I/O.
STALLED = the last `k` attempts all failed with the SAME non-empty fingerprint.
Also surfaces oscillation: actions tried >1Γ under that same fingerprint (the dead-ends).
"""
if not rows:
return {"verdict": "PROGRESS", "stall_count": 0, "fingerprint": "",
"recommend": "continue", "dead_ends": [], "reason": "empty journal"}
last = rows[-1]
fp = last.get("fingerprint", "")
if last.get("gate") == "pass" or not fp:
return {"verdict": "PROGRESS", "stall_count": 0, "fingerprint": fp,
"recommend": "continue", "dead_ends": [],
"reason": "last attempt passed or had no failure signature"}
# count the trailing run of consecutive failures sharing THIS fingerprint
streak = 0
for r in reversed(rows):
if r.get("gate") != "pass" and r.get("fingerprint") == fp:
streak += 1
else:
break
# dead-end actions: actions that appear >1Γ under this exact fingerprint
seen, dups = {}, []
for r in rows:
if r.get("fingerprint") == fp and r.get("gate") != "pass":
a = (r.get("action") or "").strip()
if not a:
continue
seen[a] = seen.get(a, 0) + 1
dups = sorted([a for a, n in seen.items() if n > 1])
if streak >= k:
recommend = "escalate" if streak >= k + 1 else "switch-strategy"
return {"verdict": "STALLED", "stall_count": streak, "fingerprint": fp,
"recommend": recommend, "dead_ends": dups,
"reason": "%d consecutive failures with the same fingerprint %s" % (streak, fp)}
return {"verdict": "PROGRESS", "stall_count": streak, "fingerprint": fp,
"recommend": "continue", "dead_ends": dups,
"reason": "failing, but under the stall threshold (%d/%d)" % (streak, k)}
def cmd_stall(opts):
k = int(opts.get("k", DEFAULT_K))
a = analyze(_load(), k)
if opts.get("json"):
print(json.dumps(a, indent=2))
else:
print(a["verdict"].lower())
log(a["reason"])
if a["verdict"] == "STALLED":
log("recommend: %s β do NOT re-feed the same goal into the same failure" % a["recommend"])
if a["dead_ends"]:
log("dead-end actions (already tried, same failure): %s" % "; ".join(a["dead_ends"]))
if opts.get("exit-code") and a["verdict"] == "STALLED":
sys.exit(10)
def cmd_resume(opts):
"""The read every turn should START with β what was tried, so we never repeat a dead-end."""
rows = _load()
if not rows:
print("resume: fresh loop β no prior attempts")
return
a = analyze(rows, int(opts.get("k", DEFAULT_K)))
passed = [r for r in rows if r.get("gate") == "pass"]
print("resume: %d attempts Β· last gate=%s Β· stall=%s/%s Β· live_fp=%s" % (
len(rows), rows[-1].get("gate"), a["stall_count"], opts.get("k", DEFAULT_K),
a["fingerprint"] or "-"))
log("verdict: %s β recommend: %s" % (a["verdict"], a["recommend"]))
# distinct actions tried + their last outcome (anti-oscillation memory)
last_outcome = {}
for r in rows:
act = (r.get("action") or "").strip()
if act:
last_outcome[act] = r.get("gate")
for act, gate in list(last_outcome.items())[-12:]:
log("tried [%s] %s" % (gate, act[:70]))
if a["dead_ends"]:
log("AVOID (dead-ends): %s" % "; ".join(a["dead_ends"]))
if passed:
log("resolved fingerprints so far: %d" % len({r.get("fingerprint") for r in passed}))
def cmd_status(opts):
rows = _load()
n = int(opts.get("n", 10))
if not rows:
print("journal empty")
return
print("journal: %d records (last %d):" % (len(rows), min(n, len(rows))))
for r in rows[-n:]:
log("iter=%-3s %-7s fp=%-12s %s" % (
r.get("iteration"), r.get("gate"), r.get("fingerprint") or "-",
(r.get("action") or "")[:56]))
def cmd_since(opts):
"""Incremental triage: show ONLY the delta since the last recorded turn, not a full re-scan.
The last journal record stamped the HEAD commit; `since` diffs that commit β now plus the
working-tree changes. A turn reads this instead of re-surveying the whole tree every time.
"""
rows = _load()
base = ""
for r in reversed(rows):
if r.get("commit"):
base = r["commit"]
break
if not base:
print("since: no prior commit recorded β full working-tree state:")
print(_git(["status", "--short"]) or " (git unavailable)")
return
print("since: delta vs last recorded turn (%s):" % base[:12])
stat = _git(["diff", "--stat", "%s..HEAD" % base])
if stat:
for ln in stat.splitlines():
log(ln)
wt = _git(["status", "--short"])
if wt:
log("working tree:")
for ln in wt.splitlines():
log(" " + ln)
if not stat and not wt:
log("no change since last turn β triage can skip a full re-scan")
def cmd_selftest(_opts):
checks = []
def chk(name, got, want):
ok = got == want
checks.append(ok)
print(" [%s] %-30s got=%s want=%s" % ("ok" if ok else "XX", name, got, want))
# fingerprint stability: same bug with different line numbers / addresses / timestamps -> same hash
a = fingerprint("FAILED test_login at app/auth.py:42 (0x7ffd, 2026-06-24T10:00:00Z) 1.3s")
b = fingerprint("FAILED test_login at app/auth.py:99 (0x1abc, 2026-06-25T11:22:33Z) 0.4s")
chk("fingerprint.stable", a == b and a != "", True)
# a DIFFERENT failure -> different hash
c = fingerprint("AssertionError: expected 3 got 4 in test_math")
chk("fingerprint.distinct", c != a, True)
chk("fingerprint.empty", fingerprint(""), "")
base = {"hypothesis": "", "note": "", "ts": "t"}
# 3 identical failures -> STALLED at k=3
rows = [dict(base, iteration=i, action="retry fetch", gate="fail", fingerprint="deadbeef0001")
for i in (1, 2, 3)]
v = analyze(rows, 3)
chk("stall.detected", v["verdict"], "STALLED")
chk("stall.count", v["stall_count"], 3)
chk("stall.deadend", v["dead_ends"], ["retry fetch"])
# a pass on the latest turn -> PROGRESS, streak resets
rows2 = rows + [dict(base, iteration=4, action="fix root cause", gate="pass", fingerprint="")]
chk("progress.after_pass", analyze(rows2, 3)["verdict"], "PROGRESS")
# two fails, different fingerprints -> not stalled (it's moving)
rows3 = [dict(base, iteration=1, action="a", gate="fail", fingerprint="aaa1"),
dict(base, iteration=2, action="b", gate="fail", fingerprint="bbb2")]
chk("progress.moving", analyze(rows3, 3)["verdict"], "PROGRESS")
# below threshold -> PROGRESS but streak counted
chk("progress.under_k", analyze(rows[:2], 3)["stall_count"], 2)
ok = all(checks)
print("selftest: %s (%d/%d)" % ("PASS" if ok else "FAIL", sum(checks), len(checks)))
sys.exit(0 if ok else 1)
def _parse(args):
opts = {}
i = 0
while i < len(args):
a = args[i]
if a.startswith("--"):
key = a[2:]
if i + 1 < len(args) and not args[i + 1].startswith("--"):
opts[key] = args[i + 1]
i += 2
else:
opts[key] = True
i += 1
else:
i += 1
return opts
def main():
argv = sys.argv[1:]
if not argv:
print(__doc__)
sys.exit(2)
sub, opts = argv[0], _parse(argv[1:])
{"record": cmd_record, "fingerprint": cmd_fingerprint, "stall": cmd_stall,
"resume": cmd_resume, "status": cmd_status, "since": cmd_since,
"selftest": cmd_selftest}.get(
sub, lambda _o: (print("unknown command '%s'. choices: record fingerprint stall resume "
"status since selftest" % sub), sys.exit(2)))(opts)
if __name__ == "__main__":
main()