From 3f5416a7558ce76ea42a076b020d02e379e43595 Mon Sep 17 00:00:00 2001 From: Shawn Pana Date: Tue, 16 Jun 2026 14:10:44 -0700 Subject: [PATCH 1/2] Add watch plugin: /watch real-time Chrome monitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Colocated plugin (watch/) that monitors the user's Chrome live — network, console logs, and user actions (clicks/typing/navigations) plus a dense screen recording — and answers what happened on demand. Follows the active tab across all tabs. Depends on the browser-harness plugin + CLI, checked in the skill. - watch/.claude-plugin/plugin.json — plugin manifest - watch/skills/watch/SKILL.md — /watch entry; spawn monitor, stop+answer flow - watch/skills/watch/monitor.py — background CDP monitor (frames + timeline) - marketplace.json — register "watch" with source: ./watch - README — add watch row + install line Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude-plugin/marketplace.json | 8 ++ README.md | 4 +- watch/.claude-plugin/plugin.json | 13 +++ watch/.gitignore | 2 + watch/skills/watch/SKILL.md | 67 +++++++++++++++ watch/skills/watch/monitor.py | 136 +++++++++++++++++++++++++++++++ 6 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 watch/.claude-plugin/plugin.json create mode 100644 watch/.gitignore create mode 100644 watch/skills/watch/SKILL.md create mode 100644 watch/skills/watch/monitor.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 0dcb75d..f39f5c4 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -18,6 +18,14 @@ "source": "./qa", "homepage": "https://github.com/browser-use/plugins/tree/main/qa", "keywords": ["qa", "testing", "browser-use", "cloud-browser", "score", "evaluation", "ngrok"] + }, + { + "name": "watch", + "description": "Monitor your Chrome in real time — network, console logs, and your actions (clicks/typing/navigations) plus a dense screen recording — then answer what happened. Follows the active tab across all tabs. Run as /watch. Requires the browser-harness plugin + CLI.", + "category": "automation", + "source": "./watch", + "homepage": "https://github.com/browser-use/plugins/tree/main/watch", + "keywords": ["watch", "monitor", "browser-use", "recording", "timeline", "network", "console", "observability"] } ] } diff --git a/README.md b/README.md index 6b68ce6..c9090b9 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@ This repo is a catalog of Browser Use plugins for Claude Code. Most entries remo |---|---|---| | **browser-harness** | Direct CDP browser control — coordinate clicks, screenshots, persistent Python session, local Chrome or Browser Use cloud. | [browser-use/browser-harness](https://github.com/browser-use/browser-harness) | | **qa** | QA-test a website or app and return a 1–5 quality score with evidence. Drives a Browser Use cloud browser and tunnels localhost automatically. Run as `/qa `. | [`qa/`](./qa) (colocated; requires browser-harness) | +| **watch** | Monitor your Chrome in real time — network, console, and your actions (clicks/typing/navigations) plus a dense screen recording — then answer what happened. Follows the active tab across all tabs. Run as `/watch`. | [`watch/`](./watch) (colocated; requires browser-harness) | -Both ship **skills only**. The `browser-harness` CLI is a one-time install prerequisite documented inside the plugin; `qa` runs through browser-harness, so install that first. +All ship **skills only**. The `browser-harness` CLI is a one-time install prerequisite documented inside the plugin; `qa` and `watch` run through browser-harness, so install that first. ## Install @@ -19,6 +20,7 @@ Both ship **skills only**. The `browser-harness` CLI is a one-time install prere claude plugin marketplace add browser-use/plugins claude plugin install browser-harness@browser-use claude plugin install qa@browser-use # adds /qa +claude plugin install watch@browser-use # adds /watch ``` ## Layout diff --git a/watch/.claude-plugin/plugin.json b/watch/.claude-plugin/plugin.json new file mode 100644 index 0000000..6d18985 --- /dev/null +++ b/watch/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "watch", + "version": "0.1.0", + "description": "Monitor the user's Chrome in real time — network, console logs, and user actions (clicks/typing/navigations) — plus a dense screen recording — then answer what happened. Follows the active tab across all tabs. Requires the browser-harness plugin + CLI.", + "author": { + "name": "Browser Use", + "url": "https://browser-use.com" + }, + "homepage": "https://github.com/browser-use/plugins", + "repository": "https://github.com/browser-use/plugins", + "license": "MIT", + "keywords": ["watch", "monitor", "browser", "browser-use", "recording", "timeline", "network", "console", "observability"] +} diff --git a/watch/.gitignore b/watch/.gitignore new file mode 100644 index 0000000..7a60b85 --- /dev/null +++ b/watch/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.pyc diff --git a/watch/skills/watch/SKILL.md b/watch/skills/watch/SKILL.md new file mode 100644 index 0000000..a722c4e --- /dev/null +++ b/watch/skills/watch/SKILL.md @@ -0,0 +1,67 @@ +--- +name: watch +description: Monitor the user's Chrome in real time — network requests, console logs/errors, and user actions (clicks, form changes, navigations) — plus a dense screen recording. Use when the user wants you to "watch", "monitor", or "keep an eye on" their browser so you can later answer what happened ("what was that error?", "did the upload finish?", "why did checkout fail?"). You keep watching across turns and decide when to stop — when the user asks about what happened, stop, review, and answer. +--- + +# watch + +Watch the user's Chrome so you can later tell them exactly what happened. You run a background monitor that records **two streams**: + +- **Timeline** (`timeline.jsonl`) — your primary signal: network requests/responses, console logs + exceptions, and user actions (clicks, form `change`s with field identity + value *length* only, submits, navigations). This is *what* happened, timestamped. +- **Frames** (`frames/.png`) — a dense ~2.5 fps recording. This is *how it looked*. You consult it on demand by reading the frames around a timestamp — don't load the whole thing. + +Captures **only Chrome** (the page's own CDP stream), needs **no screen-recording permission**, and is **not a daemon** — you start it and you stop it, within this session. Requires the `browser-harness` CLI on PATH (verify with `command -v browser-harness`; if missing, tell the user to install it — see https://github.com/browser-use/browser-harness). + +## Start watching + +When asked to watch/monitor, spawn the monitor **in the background** and confirm, then go quiet — it accumulates on its own while you're idle between turns. + +```bash +MON="" # e.g. ${CLAUDE_PLUGIN_ROOT}/skills/watch/monitor.py +WATCH_DIR="/tmp/watch-$(date +%s)"; mkdir -p "$WATCH_DIR" +echo "$WATCH_DIR" > /tmp/watch-current # so you can find it next turn +WATCH_DIR="$WATCH_DIR" browser-harness < "$MON" # run this with run_in_background +``` + +Use the real absolute path to `monitor.py` in this skill's directory (you know where this SKILL.md lives; under a plugin it's `${CLAUDE_PLUGIN_ROOT}/skills/watch/monitor.py`). Run that last line as a **background** command. Tell the user: "Watching your Chrome — go do your thing, then ask me what happened." Do **not** poll or screenshot yourself between turns; the monitor handles it. + +## When the user asks what happened + +You decide this is the cue to stop (a question about what occurred = stop, review, answer). Then: + +1. **Stop the monitor** cleanly: `touch "$(cat /tmp/watch-current)/STOP"` and give it ~1s to flush. +2. **Read the FULL structured timeline first — this is the authoritative record, the frames are not.** Always reconstruct from the complete event stream before looking at a single image: + - **Every action** (`action` kind): clicks, typing, keys, scrolls, submits — the whole list, chronological, not a filtered subset. + - **Every navigation** (`watch.tab` + `action.k=="nav"`): the tab/URL trail. **Searches and direct URL visits live here, not in actions** — a Google/address-bar search shows up only as a navigation whose URL holds the `q=` query (page listeners can't see omnibox typing). If you skip navigations you will miss searches and page-to-page movement. + - **Network** (`net` / `net.fail`): statuses, failures, the API calls behind each step. + ```bash + D="$(cat /tmp/watch-current)" + # full action + navigation trail, chronological — read ALL of it, don't pre-filter to a keyword + python3 -c " + import json,datetime + for l in open('$D/timeline.jsonl'): + e=json.loads(l); k=e['kind']; d=e['data'] + if k not in ('action','watch.tab'): continue + ts=datetime.datetime.fromtimestamp(e['t']).strftime('%H:%M:%S') + if k=='watch.tab': print(f'[{ts}] TAB {d[\"url\"]}') + else: print(f'[{ts}] {d.get(\"k\"):7} {d.get(\"target\",\"\")[:40]} {d.get(\"text\") or d.get(\"to\") or d.get(\"value\") or d.get(\"key\") or d.get(\"y\",\"\")}') + " + # network errors / notable statuses + grep -E '"net.fail"|"status": [45][0-9][0-9]' "$D/timeline.jsonl" + ``` + Build the answer from this complete picture. Each line has `t` (epoch seconds) → your index into the frames. +3. **Then use the images as extra help** — only for the few moments where a line needs visual detail (what a page/error/post actually looked like). List frames whose `` falls in `[t-3s, t+3s]` and `Read` that slice. Frames are supplementary confirmation, never the primary source, and never the whole `frames/` dir. + ```bash + T=; lo=$(( (T-3)*1000 )); hi=$(( (T+3)*1000 )) + for f in "$D"/frames/*.jpg; do n=$(basename "$f" .jpg); [ "$n" -ge "$lo" ] && [ "$n" -le "$hi" ] && echo "$f"; done + ``` +4. **Answer from the full timeline, backed by frames where useful** — include the navigation/search trail, e.g. "…then you searched Google for **shawn pan poop**, opened the first result, and `POST /api/x` returned **500** (frame `…188.jpg` shows the red toast)." Don't report only the in-page clicks; the searches and tab moves are part of "what I did." + +Optional — if the user wants to watch it themselves, assemble an mp4 from the frame slice with ffmpeg (`ffmpeg -pattern_type glob -i '…/frames/*.jpg' clip.mp4`). + +## Notes + +- **Granular by design:** the recording is dense so playback shows motion (spinner → toast → redirect), but you only ever *read* the slice the question needs — the timeline tells you where to look. +- **Privacy:** raw keystrokes and input values are **not** captured — only field identity + value length, and password fields are redacted. Say so if the user asks. +- **Cleanup:** after answering, the monitor is already stopped (STOP sentinel). The session dir under `/tmp/watch-*` holds the frames/timeline until you or the user removes it. +- If the user says "keep watching" or asks something mid-stream, you may answer from the timeline without stopping — only stop when the episode is clearly over. diff --git a/watch/skills/watch/monitor.py b/watch/skills/watch/monitor.py new file mode 100644 index 0000000..a2c30bd --- /dev/null +++ b/watch/skills/watch/monitor.py @@ -0,0 +1,136 @@ +# Background Chrome monitor for the /watch skill — v3. +# Follows the ACTIVE tab across all tabs; actions pushed via CDP binding (nav-proof, immediate). +# Run via: WATCH_DIR=/tmp/watch-XXXX browser-harness < monitor.py (backgrounded) +# Writes: $WATCH_DIR/frames/.jpg (dense recording of whichever tab is in front) +# $WATCH_DIR/timeline.jsonl (actions + console + network, tagged with tab url) +# Stop: touch $WATCH_DIR/STOP +# Captures ONLY Chrome, no OS permission. Privacy: input is length-only; passwords redacted; no raw text. + +import os, time, json, base64, glob + +WATCH_DIR = os.environ["WATCH_DIR"] +FRAMES = os.path.join(WATCH_DIR, "frames"); os.makedirs(FRAMES, exist_ok=True) +TL = open(os.path.join(WATCH_DIR, "timeline.jsonl"), "a", buffering=1) +STOP = os.path.join(WATCH_DIR, "STOP") +INTERVAL = float(os.environ.get("WATCH_INTERVAL", "0.4")) +MAX_AGE = float(os.environ.get("WATCH_MAX_AGE", "1800")) + +# Listeners push each action through the __watchEmit CDP binding the instant it happens — +# so a click that triggers a navigation is captured before the page unloads. +BOOT = r""" +(function(){ + if (window.__watch__) return; window.__watch__ = 1; + var emit=function(o){ try{ o.t=Date.now()/1000; o.url=location.href; + if (window.__watchEmit) { window.__watchEmit(JSON.stringify(o)); } // fresh pages: instant, nav-proof + else { (window.__watchLog=window.__watchLog||[]).push(o); // pre-existing pages: buffer, polled + if(window.__watchLog.length>1000) window.__watchLog.shift(); } }catch(e){} }; + var d=function(el){ if(!el||!el.tagName) return ''; var s=el.tagName; + if(el.id)s+='#'+el.id; if(el.name)s+='[name='+el.name+']'; + if(typeof el.className==='string'&&el.className)s+='.'+el.className.split(' ')[0]; return s; }; + var val=function(t){ return (t.type==='password')?'':((t.value||'').length+' chars'); }; + document.addEventListener('click',function(e){emit({k:'click',target:d(e.target),text:(e.target.innerText||'').slice(0,60)});},true); + document.addEventListener('submit',function(e){emit({k:'submit',target:d(e.target),action:e.target.action||''});},true); + document.addEventListener('change',function(e){emit({k:'change',target:d(e.target),value:val(e.target)});},true); + document.addEventListener('input',function(e){var t=e.target; clearTimeout(t.__wt); + t.__wt=setTimeout(function(){emit({k:'type',target:d(t),value:val(t)});},600);},true); + document.addEventListener('keydown',function(e){ if(['Enter','Escape','Tab'].indexOf(e.key)>=0) + emit({k:'key',key:e.key,target:d(e.target)});},true); + var st; window.addEventListener('scroll',function(){ if(st)return; + st=setTimeout(function(){emit({k:'scroll',y:Math.round(window.scrollY||0)}); st=null;},800);},true); + ['error','warn'].forEach(function(lvl){ var o=console[lvl]; + console[lvl]=function(){ try{emit({k:'console.'+lvl,args:[].slice.call(arguments).map(String).slice(0,5)});}catch(e){} + return o.apply(console,arguments);};}); + var last=location.href; setInterval(function(){ if(location.href!==last){emit({k:'nav',from:last,to:location.href}); last=location.href;} },300); +})(); +""" + +cdp("Target.setAutoAttach", autoAttach=True, flatten=True, waitForDebuggerOnStart=False) +sessions = {} + +def page_targets(): + return [t for t in cdp("Target.getTargets")["targetInfos"] + if t["type"] == "page" and not t["url"].startswith(("devtools://", "chrome://"))] + +def ev(sid, expr): + return cdp("Runtime.evaluate", session_id=sid, expression=expr, returnByValue=True).get("result", {}).get("value") + +def rec(kind, data): + TL.write(json.dumps({"t": time.time(), "kind": kind, "data": data}) + "\n") + +def attach(t): + sid = cdp("Target.attachToTarget", targetId=t["targetId"], flatten=True)["sessionId"] + cdp("Runtime.enable", session_id=sid) + cdp("Runtime.addBinding", session_id=sid, name="__watchEmit") # nav-proof action channel + try: cdp("Network.enable", session_id=sid) + except Exception: pass + try: cdp("Page.enable", session_id=sid) # so every full navigation is logged + except Exception: pass + cdp("Page.addScriptToEvaluateOnNewDocument", session_id=sid, source=BOOT) # arm future docs + try: ev(sid, BOOT) # arm current doc + except Exception: pass + sessions[t["targetId"]] = sid + rec("watch.tab", {"url": t["url"]}) + return sid + +rec("watch.started", {"dir": WATCH_DIR}) +while not os.path.exists(STOP): + now = time.time() + tgts = page_targets() + live = {t["targetId"] for t in tgts} + for tid in [k for k in sessions if k not in live]: + sessions.pop(tid, None) + active = None + for t in tgts: + sid = sessions.get(t["targetId"]) or attach(t) + try: + if ev(sid, "document.visibilityState") == "visible": + foc = ev(sid, "document.hasFocus()") + if active is None or foc: + active = sid + except Exception: + pass + # poll JS-side buffer too — covers tabs already open before watching started + try: + buf = ev(sid, "JSON.stringify((window.__watchLog||[]).splice(0))") + if buf and buf != "[]": + for o in json.loads(buf): + rec("action", o) + except Exception: + pass + # one drain covers ALL attached sessions: actions (bindingCalled) + network + try: + for e in drain_events(): + m = e.get("method", "") + if m == "Runtime.bindingCalled" and e["params"].get("name") == "__watchEmit": + try: rec("action", json.loads(e["params"]["payload"])) + except Exception: pass + elif m == "Page.frameNavigated": + fr = e["params"].get("frame", {}) + if not fr.get("parentId"): # main frame only (full nav / search) + rec("nav", {"url": fr.get("url")}) + elif m == "Runtime.exceptionThrown": + ed = e["params"].get("exceptionDetails", {}); rec("page.error", {"text": ed.get("text")}) + elif m == "Network.responseReceived": + r = e["params"].get("response", {}); rec("net", {"status": r.get("status"), "url": r.get("url")}) + elif m == "Network.loadingFailed": + rec("net.fail", {"error": e["params"].get("errorText"), "type": e["params"].get("type")}) + except Exception: + pass + if active: + try: + shot = cdp("Page.captureScreenshot", session_id=active, format="jpeg", quality=60) + if shot.get("data"): + open(os.path.join(FRAMES, "%d.jpg" % int(now * 1000)), "wb").write(base64.b64decode(shot["data"])) + except Exception: + pass + if int(now) % 12 == 0: + cutoff = (now - MAX_AGE) * 1000 + for f in glob.glob(os.path.join(FRAMES, "*.jpg")): + try: + if int(os.path.basename(f)[:-4]) < cutoff: os.remove(f) + except Exception: pass + time.sleep(INTERVAL) + +rec("watch.stopped", {}) +TL.close() +print("watch monitor stopped:", WATCH_DIR) From f0268c6b4600c0de00c59c14c3f6622ca4c1872a Mon Sep 17 00:00:00 2001 From: Shawn Pana Date: Tue, 16 Jun 2026 18:00:30 -0700 Subject: [PATCH 2/2] watch: auto-cleanup of session dirs + name failing domains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - monitor.py: purge old /tmp/watch-* dirs on start (keep newest 3) and switch the in-session frame trim to a time-based sweep so it isn't skipped when loop iterations are slow (many tabs) — fixes unbounded frame growth (a single session had ballooned to 2.4GB). - monitor.py: capture requestId->url so net.fail names the failing domain (e.g. ERR_NAME_NOT_RESOLVED -> https://dead-host/...). - SKILL.md: document the auto-cleanup + how to drop the current session. Co-Authored-By: Claude Opus 4.8 (1M context) --- watch/skills/watch/SKILL.md | 2 +- watch/skills/watch/monitor.py | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/watch/skills/watch/SKILL.md b/watch/skills/watch/SKILL.md index a722c4e..f6ce711 100644 --- a/watch/skills/watch/SKILL.md +++ b/watch/skills/watch/SKILL.md @@ -63,5 +63,5 @@ Optional — if the user wants to watch it themselves, assemble an mp4 from the - **Granular by design:** the recording is dense so playback shows motion (spinner → toast → redirect), but you only ever *read* the slice the question needs — the timeline tells you where to look. - **Privacy:** raw keystrokes and input values are **not** captured — only field identity + value length, and password fields are redacted. Say so if the user asks. -- **Cleanup:** after answering, the monitor is already stopped (STOP sentinel). The session dir under `/tmp/watch-*` holds the frames/timeline until you or the user removes it. +- **Cleanup:** the recorder keeps disk bounded on its own — it trims frames older than `WATCH_MAX_AGE` (30 min) within a session, and on every start it purges old `/tmp/watch-*` dirs, keeping only the newest `WATCH_KEEP_DIRS` (3). After you've fully answered and stopped the monitor, you can also `rm -rf "$(cat /tmp/watch-current)"` to drop the current session's frames immediately. - If the user says "keep watching" or asks something mid-stream, you may answer from the timeline without stopping — only stop when the episode is clearly over. diff --git a/watch/skills/watch/monitor.py b/watch/skills/watch/monitor.py index a2c30bd..1c8f364 100644 --- a/watch/skills/watch/monitor.py +++ b/watch/skills/watch/monitor.py @@ -13,7 +13,25 @@ TL = open(os.path.join(WATCH_DIR, "timeline.jsonl"), "a", buffering=1) STOP = os.path.join(WATCH_DIR, "STOP") INTERVAL = float(os.environ.get("WATCH_INTERVAL", "0.4")) -MAX_AGE = float(os.environ.get("WATCH_MAX_AGE", "1800")) +MAX_AGE = float(os.environ.get("WATCH_MAX_AGE", "1800")) # trim frames older than this (sec) +KEEP_DIRS = int(os.environ.get("WATCH_KEEP_DIRS", "3")) # how many session dirs to keep around + +def purge_old_sessions(): + """Delete stale /tmp/watch-* session dirs so /watch can't leak GB of frames over time. + Keeps the newest KEEP_DIRS (by mtime), including the current one; removes the rest.""" + import shutil + base = os.path.dirname(WATCH_DIR) + dirs = [] + for n in os.listdir(base): + p = os.path.join(base, n) + if n.startswith("watch-") and n[6:].isdigit() and os.path.isdir(p): + try: dirs.append((os.path.getmtime(p), p)) + except OSError: pass + for _, p in sorted(dirs, reverse=True)[KEEP_DIRS:]: + if p != WATCH_DIR: + shutil.rmtree(p, ignore_errors=True) + +purge_old_sessions() # Listeners push each action through the __watchEmit CDP binding the instant it happens — # so a click that triggers a navigation is captured before the page unloads. @@ -73,6 +91,8 @@ def attach(t): return sid rec("watch.started", {"dir": WATCH_DIR}) +req_urls = {} # requestId -> url, so a failed request can name its domain +last_sweep = 0.0 # wall-clock of the last frame-retention sweep while not os.path.exists(STOP): now = time.time() tgts = page_targets() @@ -110,10 +130,16 @@ def attach(t): rec("nav", {"url": fr.get("url")}) elif m == "Runtime.exceptionThrown": ed = e["params"].get("exceptionDetails", {}); rec("page.error", {"text": ed.get("text")}) + elif m == "Network.requestWillBeSent": + req_urls[e["params"].get("requestId")] = e["params"].get("request", {}).get("url") + if len(req_urls) > 3000: # bound memory + for k in list(req_urls)[:1000]: req_urls.pop(k, None) elif m == "Network.responseReceived": r = e["params"].get("response", {}); rec("net", {"status": r.get("status"), "url": r.get("url")}) elif m == "Network.loadingFailed": - rec("net.fail", {"error": e["params"].get("errorText"), "type": e["params"].get("type")}) + rid = e["params"].get("requestId") + rec("net.fail", {"error": e["params"].get("errorText"), "type": e["params"].get("type"), + "url": req_urls.get(rid, "")}) except Exception: pass if active: @@ -123,7 +149,8 @@ def attach(t): open(os.path.join(FRAMES, "%d.jpg" % int(now * 1000)), "wb").write(base64.b64decode(shot["data"])) except Exception: pass - if int(now) % 12 == 0: + if now - last_sweep > 12: # time-based — runs even when iterations are slow (many tabs) + last_sweep = now cutoff = (now - MAX_AGE) * 1000 for f in glob.glob(os.path.join(FRAMES, "*.jpg")): try: