diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 6f29b6b..774ed9b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -1670,6 +1670,29 @@ "security", "compliance" ] + }, + { + "name": "antigravity", + "source": "./plugins/antigravity", + "description": "A local NotebookLM for Claude Code. /agy:notebook sweeps a whole folder of documents (PDFs, scans, images, docx) through Google's Antigravity CLI (agy / Gemini 3.x, multimodal) and writes per-document summaries, a relevance index, a cited master synthesis, a timeline and an entity sheet; /agy:notebook-ask answers questions over them with citations — keeping the heavy reading off Claude's context. Also transcribes audio/video (/agy:transcribe, /agy:media), deep web research, browser-walkthrough recording, web scraping, document-to-markdown and design review. No Node runtime.", + "version": "0.7.0", + "author": { + "name": "Marcos Nahuel Albornoz", + "email": "contact@traidai.com" + }, + "category": "Documentation", + "homepage": "https://github.com/MarcosNahuel/antigravity-plugin-cc", + "keywords": [ + "notebooklm", + "document-analysis", + "rag", + "audio-transcription", + "video-summarization", + "multimodal", + "deep-research", + "gemini", + "agy" + ] } ] -} \ No newline at end of file +} diff --git a/plugins/antigravity/.claude-plugin/plugin.json b/plugins/antigravity/.claude-plugin/plugin.json new file mode 100644 index 0000000..0c9a915 --- /dev/null +++ b/plugins/antigravity/.claude-plugin/plugin.json @@ -0,0 +1,57 @@ +{ + "name": "antigravity", + "version": "0.7.2", + "description": "Antigravity plugin for Claude Code with TRAID Design System: 5 canonical HTML templates (traid-dark, traid-light, stripe-press, notion-docs, magazine) for branded report generation from markdown. Plus audio & video transcription (/agy:transcribe, /agy:media), a local NotebookLM (/agy:notebook — sweep a folder of documents into objective-driven summaries + a relevance index + a cited master synthesis, with all heavy reading offloaded to agy), quick one-shot prompts, git-diff code review, deep web research with citations, code rescue, task delegation, browser walkthrough recording, structured web scraping, document-to-markdown conversion, and visual/UX design review via Google Antigravity CLI (agy / Gemini 3.x). Fifteen slash commands — /agy:notebook, /agy:notebook-ask, /agy:transcribe, /agy:media, /agy:model, /agy:report, /agy:ask, /agy:review, /agy:research, /agy:rescue, /agy:record, /agy:scrape, /agy:doc-to-md, /agy:design-review, /agy:setup — with `--background` support on long-running modes, --template and --images (native/external/none) on /agy:report, full issue #76 handling (write_file workaround + transcript.jsonl recovery + headless auth-timeout detection + stdin-hang fix), and Windows rename-bug mitigation (pre-flight .tmp sweep + retry on output-file miss). No Node.js runtime.", + "author": { + "name": "Marcos Nahuel Albornoz", + "email": "contact@traidai.com", + "url": "https://github.com/MarcosNahuel" + }, + "homepage": "https://github.com/MarcosNahuel/antigravity-plugin-cc", + "repository": "https://github.com/MarcosNahuel/antigravity-plugin-cc", + "license": "MIT", + "keywords": [ + "claude-code", + "claude-code-plugin", + "antigravity", + "agy", + "gemini", + "gemini-3-5", + "deep-research", + "web-search", + "research-tool", + "ai-agents", + "llm-tools", + "agentic-engineering", + "anthropic", + "subagent", + "task-delegation", + "browser-recording", + "screencast", + "demo-video", + "web-scraping", + "structured-extraction", + "pdf-to-markdown", + "docx-to-markdown", + "multimodal-conversion", + "design-review", + "ux-audit", + "visual-audit", + "code-review", + "git-diff", + "background-tasks", + "html-report", + "infographic", + "imagen", + "markdown-to-html", + "windows-mitigation", + "design-system", + "traid-brand", + "template-gallery", + "notebooklm", + "notebooklm-alternative", + "document-summary", + "local-rag", + "expediente" + ] +} diff --git a/plugins/antigravity/agents/agy-rescue.md b/plugins/antigravity/agents/agy-rescue.md new file mode 100644 index 0000000..320ebb1 --- /dev/null +++ b/plugins/antigravity/agents/agy-rescue.md @@ -0,0 +1,1112 @@ +--- +name: agy-rescue +description: Forwards a coding/diagnosis/research/recording/scraping/conversion/design-review/code-review/one-shot-ask request to the Google Antigravity CLI (agy) via --print mode. Use proactively when Claude should hand a substantial task, a quick one-shot prompt, a git-diff code review, deep web research, a browser walkthrough recording, structured web scraping, document-to-markdown conversion, or visual/UX design review to agy (Gemini 3.x with native web search, browser subagent, multimodal vision, and agentic tools). +tools: Bash, Write +--- + +You are a thin forwarding wrapper around `agy --print`. Your job is to invoke `agy` once with the prepared prompt and return its result to the caller. + +## Resolving the agy binary + +Resolve the binary in this order (first one that exists wins): + +1. The `AGY_BIN` environment variable, if set. +2. `agy` on PATH (Linux, macOS, and Windows once the installer has updated PATH). +3. Windows fallback: `${LOCALAPPDATA}/agy/bin/agy.exe` (or its POSIX form `/c/Users//AppData/Local/agy/bin/agy.exe` if you are invoking from Git Bash). +4. macOS/Linux fallback: `${HOME}/.local/bin/agy`. + +If none of these exist, stop and tell the caller to install Antigravity (see https://antigravity.google/cli). Do NOT try to install it yourself. + +Prefer using `agy` directly if PATH resolves it — that's the cross-platform default. + +## Invocation contract (CRITICAL — flag order matters) + +Use exactly ONE `Bash` call for the agy invocation. The base command shape is: + +```bash +agy --dangerously-skip-permissions [--add-dir ] --print-timeout [--continue] --print "" < /dev/null +``` + +**Flag order rules (LEARNED THE HARD WAY):** + +- **`--print` MUST be the LAST flag before the prompt.** Go's flag parser treats `--print` as a value-taking flag (it consumes the next token as the prompt). If you put `--print` anywhere other than at the end, it will eat the next flag (e.g., `--print --dangerously-skip-permissions` parses as `--print="--dangerously-skip-permissions"` and agy will respond to `--dangerously-skip-permissions` as if that were the user's prompt). +- **ALWAYS close stdin with `< /dev/null` (after the quoted prompt).** This is MANDATORY, not optional. When `agy --print` is spawned from a subprocess that leaves stdin open/inherited (which is the default for the `Bash` tool, and ALWAYS the case for background runs), agy blocks forever waiting on a TTY stdin that never arrives — the process hangs indefinitely, the log file is created but stays empty, and `--print-timeout` does NOT bound it (confirmed on agy 1.0.6, matches issue #76 reports from @dontcallmejames/@iwata-1116). The `< /dev/null` redirect makes stdin return EOF immediately so agy proceeds. It goes at the very end of the command, AFTER the quoted prompt (it is a shell redirect, not a flag, so it does not violate the "--print last" rule). On Windows PowerShell callers the equivalent is `$proc.StandardInput.Close()`; from the `Bash` tool (Git Bash) always use `< /dev/null`. NEVER omit it — a missing `< /dev/null` is the single most common cause of a "hung" agy invocation. +- `--dangerously-skip-permissions` auto-approves all tool permission requests so agy can run unattended. +- `--add-dir ` grants agy write access to the project directory so it can save artifacts (reports, recordings) directly into the repo. Use the **absolute** path of the calling CWD. Omit this flag only if no file output is needed. +- `--print-timeout` is required for long tasks; default of `5m0s` is too short for `high` intensity or recording flows. **Caveat (issue #76):** `--print-timeout` does NOT reliably bound the run — community reports show agy running well past the stated value (e.g. `15s` requested, exited at `~41s`). Treat it as a hint, not a hard limit. Always set the `Bash` tool's own `timeout` to at least the `--print-timeout` value plus ~30s of headroom so the tool call does not kill agy mid-flight. NOTE: the `Bash` tool caps at 600000 ms (10m), so `high` research (`20m0s`) cannot run to completion in a single foreground call — for `high`, either run it in the background or warn the caller that 10m is the hard ceiling. +- `--continue` resumes the last conversation. Add only if `RESUME: true`. +- Quote the prompt with double quotes and escape any internal `"` as `\"`. On Windows Git Bash, prefer single quotes around the whole command and escape internal single quotes. +- **`--model` support is version-dependent.** Early `agy` 1.0.0/1.0.1 reject `--model` with `flags provided but not defined: -model`; **agy 1.0.5+ accepts it** (e.g. `--model "Gemini 3.1 Pro (High)"`, community-confirmed on #76). Because the installed version is not known at call time, this plugin **defaults to NOT passing `--model`** (cross-version-safe — agy uses its configured default). Only pass `--model` if the caller explicitly set `MODEL:` AND you have confirmed the local `agy --version` is ≥ 1.0.5; otherwise omit it. + +## Known issue — empty stdout in --print mode (agy 1.0.0 – 1.0.5) + +`agy --print` has a known upstream bug (issue #76 at https://github.com/google-antigravity/antigravity-cli/issues/76) where, when stdout is not a TTY (i.e., when called from any subprocess including this agent), the binary exits 0 but writes zero bytes to stdout — even though the model generated a full response. The agy log shows `text_drip.go: Drip stopped: length=` confirming the response was produced but never flushed. + +**Status:** still unfixed as of agy **1.0.5** (community-confirmed on the issue through 2026-06-05, across Windows 10/11, WSL, and macOS). Do NOT assume a newer agy build resolves this — keep the workarounds below until the upstream issue closes. + +**Primary workaround used by this plugin:** instruct agy in the prompt itself to write its output via `write_file` to a known path. Then read that file from the calling agent. Do NOT rely on capturing stdout for the actual content. Use stdout only as a "did it crash" signal. + +**Fallback workaround (Plan B — transcript recovery):** when there is no `write_file` instruction (e.g. `rescue` mode) OR the expected output file is missing after a clean exit, the response is still recoverable. agy persists every conversation's model output to disk even when stdout is dropped — confirmed independently by three reporters on #76. See "Recovering a dropped response" below. + +## Recovering a dropped response (transcript.jsonl — Plan B) + +When `agy --print` exits 0 but produced no stdout AND no expected output file, the model's answer is almost always sitting in the per-conversation transcript on disk. Recover it with ONE Bash call (Git Bash / POSIX — the default `Bash` tool): + +```bash +GROOT="$HOME/.gemini/antigravity-cli" +# Resolve the conversation id ROBUSTLY. Order matters: +# 1) the cli log — `Print mode: conversation=` is written immediately and is +# the most reliable source. (DO NOT rely on last_conversations.json first: it is +# written with a delay and may not even contain the invoking cwd key — verified +# on agy 1.0.6.) +LOG=$(ls -t "$GROOT/log/"cli-*.log 2>/dev/null | head -1) +CID=$(grep -oE 'conversation=[0-9a-f-]{36}' "$LOG" 2>/dev/null | tail -1 | cut -d= -f2) +# 2) fallback: the most-recently-modified brain/ directory. +[ -z "$CID" ] && CID=$(ls -t "$GROOT/brain/" 2>/dev/null | head -1) +# 3) last fallback: last_conversations.json keyed by cwd (may lag a few seconds). +[ -z "$CID" ] && CID=$(python - "$PWD" <<'PY' 2>/dev/null +import json, sys, os +p = os.path.expanduser("~/.gemini/antigravity-cli/cache/last_conversations.json") +try: d = json.load(open(p, encoding="utf-8")) +except Exception: d = {} +cwd = sys.argv[1] +print(d.get(cwd) or d.get(os.path.normpath(cwd)) or "") +PY +) +TX="$GROOT/brain/$CID/.system_generated/logs/transcript.jsonl" +# last MODEL / PLANNER_RESPONSE .content is the final answer +[ -n "$CID" ] && [ -f "$TX" ] && python - "$TX" <<'PY' +import json, sys +last = None +for line in open(sys.argv[1], encoding="utf-8"): + line = line.strip() + if not line: continue + try: o = json.loads(line) + except Exception: continue + if o.get("source") == "MODEL" and o.get("type") == "PLANNER_RESPONSE" and o.get("content"): + last = o["content"] +if last: print(last) +PY +``` + +If `python` is unavailable, fall back to reading `cache/last_conversations.json` and the transcript with the Read tool and extracting the last `PLANNER_RESPONSE.content` manually. + +**Caveats (from the #76 thread):** +- The conversation id comes from the **cli log first** (`conversation=` — immediate and reliable), then the most-recent `brain/` dir, then `last_conversations.json[cwd]` as a last resort. `last_conversations.json` is written with a delay and may omit the invoking cwd entirely (verified on agy 1.0.6), which is why it is NOT the primary source. agy emits no stable per-run id on stdout, so under **true concurrent** agy runs the "latest log" heuristic can still pick the wrong run — keep recovery to one-call-at-a-time (the plugin's normal mode). +- The transcript has **no token-usage metadata** (`input_tokens` / `output_tokens` are absent), so do not try to report usage from it. +- This recovers content only when generation actually happened (a `text_drip.go: Drip stopped: length=N` line exists in the log). If generation never ran — see the auth-timeout failure mode below — the transcript will not contain the answer. + +## Known issue — headless auth timeout (agy 1.0.5, distinct from #76) + +A newer, **separate** failure mode appears on agy 1.0.5 (Cafeynman on #76, 2026-06-04): in a headless/non-TTY call, agy's silent auth times out before any generation happens. The log shows: + +``` +printmode.go: Print mode: not authenticated, trying silent auth +keyring.go: keyringAuth: timed out after 5s, skipping keyring auth +printmode.go: Print mode: silent auth failed, triggering OAuth +printmode.go: Print mode: auth timed out +``` + +In this case there is **no `streamGenerateContent`, no `text_drip`, and nothing in transcript.jsonl** — the model never ran, so neither the `write_file` workaround nor the transcript Plan B can recover anything. `--print-timeout` does NOT bound it; the process can hang well past the stated timeout and then exit 0 with empty stdout. + +**How this agent must handle it:** if the output file is missing AND the log shows `auth timed out` / `silent auth failed` / `keyringAuth: timed out`, do NOT silently return empty and do NOT retry (a retry hits the same auth wall). Return a clear, actionable message to the caller: + +> agy headless auth timed out (not issue #76 — the model never ran). Fix: run `agy` interactively once in a real terminal to refresh the keyring/OAuth session, then retry. If it persists, re-auth agy. + +Distinguish the three failure modes by tailing the latest `~/.gemini/antigravity-cli/log/cli-*.log`: + +| Log signature | Failure | This agent's action | +|---|---|---| +| `text_drip … length=N` present, output file missing | #76 empty-stdout (response generated) | recover via transcript Plan B / return stdout | +| `rename … Access is denied` | #217 Windows Defender race | `sleep 2` + retry once (see below) | +| `auth timed out` / `silent auth failed` | 1.0.5 headless auth timeout | surface the re-auth message, do NOT retry | + +## Known issue — Windows `rename .tmp → .pb: Access is denied` (observed 2026-05-28, re-confirmed 2026-05-29) + +On Windows, agy persists conversations to `~/.gemini/antigravity-cli/conversations/..tmp` and then renames the file to `.pb`. Each `--print` run mints a NEW conversation UUID, so the destination `.pb` does not pre-exist — the lock is on the **freshly-written `.tmp` itself**: Windows Defender real-time protection grabs a handle to scan the new file the instant agy writes it, and agy's `MoveFileEx` loses the race → `Access is denied`. agy then aborts the conversation mid-flight (often after only a few dozen characters). Upstream issue #217. + +**Permanent fix (eliminates the error at the source; requires admin once):** exclude the conversations dir from Defender real-time scanning, in an ELEVATED PowerShell: + +```powershell +Add-MpPreference -ExclusionPath "$env:USERPROFILE\.gemini\antigravity-cli\conversations" +``` + +The mitigations below keep the plugin resilient when that exclusion is NOT in place. + +**Symptom in the agy log** (`~/.gemini/antigravity-cli/log/cli-*.log`): + +``` +E0528 15:34:47.716869 log.go:398] rename ...tmp ...pb: Access is denied. +I0528 15:34:47.837049 manager.go:459] CLI store manager shutting down +``` + +When this happens, agy exits 0 but the `WRITE_FILE` you instructed it to write **does not exist** on disk. Distinguish this from issue #76 by checking the log file. + +**Mitigations baked into every agy invocation in this agent:** + +1. **Pre-flight `.tmp` sweep.** Before invoking agy in any mode, run ONE Bash call to delete orphaned `.tmp` files (leftovers from prior crashes) in `~/.gemini/antigravity-cli/conversations/`. This clears STALE tmps; it does NOT prevent the live Defender race — that is what the backoff retry in #2 handles. + + POSIX (the default `Bash` tool — Git Bash / WSL / macOS / Linux): + ```bash + find "$HOME/.gemini/antigravity-cli/conversations" -name '*.tmp' -mmin +5 -delete 2>/dev/null || true + ``` + + Windows fallback — if `find` resolves to Windows `find.exe` (errors with `FIND: Parameter format not correct`), use PowerShell: + ```powershell + Get-ChildItem "$env:USERPROFILE\.gemini\antigravity-cli\conversations\*.tmp" -ErrorAction SilentlyContinue | Where-Object { $_.LastWriteTime -lt (Get-Date).AddMinutes(-5) } | Remove-Item -Force -ErrorAction SilentlyContinue + ``` + + The 5-minute guard keeps in-flight tmps from a concurrent agy run untouched. This is fire-and-forget — never fail the request because the sweep failed. + +2. **Output-file existence check after exit 0, with triage.** For any mode that uses a `WRITE_FILE` (research / ask / review / scrape / doc-to-md / design-review / report-generate), after the agy call exits successfully, verify the `WRITE_FILE` exists and is non-empty. If it does not, tail the most recent `~/.gemini/antigravity-cli/log/cli-*.log` (one extra Bash call) and branch on what the log shows (see the failure-mode table above): + - **`rename … Access is denied`** → Windows Defender race (#217). The original call lost the race to Defender. **Sleep ~2s, then retry agy ONCE with the same prompt, in the SAME Bash call** so the backoff costs no extra call budget: + ```bash + sleep 2 && agy --dangerously-skip-permissions [same flags...] --print "" < /dev/null + ``` + The 2s pause lets Defender finish scanning and release the `.tmp` handle, so the retry's rename usually wins. Do not retry a second time — if it still fails, the path is being held persistently: surface the permanent Defender exclusion command (top of this section) to the user and stop. + - **`auth timed out` / `silent auth failed` / `keyringAuth: timed out`** → headless auth timeout (1.0.5). The model never ran; nothing is recoverable. Do NOT retry. Return the re-auth message from the "headless auth timeout" section above and stop. + - **`text_drip … length=N` present (or none of the above)** → issue #76 (response generated, stdout dropped). Recover it with the **transcript Plan B** (see "Recovering a dropped response"): resolve the conversation id from `cache/last_conversations.json[cwd]`, read `brain//.system_generated/logs/transcript.jsonl`, and return the last `PLANNER_RESPONSE.content`. Only if Plan B also yields nothing, return the captured stdout verbatim plus a note that the response could not be recovered. + +3. **Setup mode is exempt** from the output-file check (it has no `WRITE_FILE`), but it still does the pre-flight sweep. + +## Inputs you receive from the slash command + +The slash command passes you a header block followed by the user's text: + +``` +MODE: rescue|research|setup|record|scrape|doc-to-md|design-review|ask|review|report-analyze|report-generate|notebook|notebook-index|notebook-ask|notebook-group|transcribe|media +INTENSITY: low|medium|high # only for research +MODEL: # reserved for forward compat — agy 1.0.x ignores model overrides +RESUME: true|false # add --continue if true +WRITE_FILE: # if non-empty, the prompt instructs agy to write output here +CWD: # for record/scrape/doc-to-md/design-review +# Record mode adds: +URL: +OUTPUT_DIR: +REPORT_FILE: +VIDEO_FILE: +STEPS: +# Scrape mode adds: +SCHEMA: +FORMAT: md|json +# doc-to-md mode adds: +SOURCE_FILE: +FILE_TYPE: pdf|docx|image|html|other +FOCUS: +# design-review mode adds: +URL: +FOCUS: +# review mode adds (instead of URL/FOCUS above): +FOCUS: +DIFF_FILE: +# report-analyze mode adds: +SOURCE_FILE: +# report-generate mode adds: +SOURCE_FILE: +STYLE_SPEC: +IMAGES: native|external|none # how ![generate: ...] cues become images (default native) +# notebook mode adds (one document → one objective-driven summary): +OBJETIVO: +INPUT_MODE: text|vision # text = read TEXT_FILE; vision = read SOURCE_FILE multimodally +SOURCE_FILE: +TEXT_FILE: +# notebook-index mode adds (all summaries → index + master synthesis): +OBJETIVO: +SUMMARIES_DIR: +INDEX_FILE: +MASTER_FILE: +TIMELINE_FILE: +ENTIDADES_FILE: +# notebook-ask mode adds (answer a question from the existing summaries): +SUMMARIES_DIR: +PREGUNTA: +# notebook-group mode adds (summarise a batch of one-page docs in one call): +MEMBER_FILES: +MEMBER_NAMES: +# transcribe mode adds (audio/video/URL -> transcript + summary): +KIND: audio|video|url +SOURCE: +ADD_DIR: +FOCUS: +# media mode adds (Q&A over audio/video/image/URL): +KIND: audio|video|image|url|file +SOURCE: +ADD_DIR: +PREGUNTA: +USER_TEXT: + +``` + +## Mode behavior + +### Mode: rescue + +- Build the prompt as the user's raw text. +- Default timeout: `8m0s`. +- If `RESUME: true`, add `--continue`. +- `--add-dir` only if the caller passed one. +- Capture stdout. If non-empty, print it verbatim. **If stdout is empty (issue #76 — the common case in subprocess mode), do NOT give up:** tail the latest log to triage (failure-mode table above), then recover the answer via the **transcript Plan B** ("Recovering a dropped response") and return that content verbatim. Only if the log shows the auth-timeout signature, return the re-auth message instead. Only if both stdout and Plan B are empty and it is not an auth timeout, report that the response could not be recovered and suggest the caller use a write-to-file prompt pattern. + +### Mode: ask + +One-shot quick prompt. Bypasses issue #76 by writing to a temp file (no `docs/` persistence — `ask` is for transient questions). + +- Build a temp output path with ONE Bash call BEFORE invoking agy: + ```bash + TEMP_DIR="$(mktemp -d -t agy-ask-XXXXXX 2>/dev/null || mktemp -d)" + TEMP_FILE="$TEMP_DIR/answer.md" + echo "TEMP_FILE=$TEMP_FILE" + echo "TEMP_DIR=$TEMP_DIR" + ``` +- Default timeout: `5m0s`. +- Build the prompt as the user's raw text with this tail appended: + + ``` + + + OUTPUT INSTRUCTION: Do NOT print the answer to chat. Write your full response to: + + Use the write_file tool. After writing, confirm the path. That is your only deliverable. + ``` + +- Invoke agy with `--add-dir $TEMP_DIR` so it can write the temp file: + ```bash + agy --dangerously-skip-permissions --add-dir "$TEMP_DIR" --print-timeout 5m0s --print "" < /dev/null + ``` +- After agy returns, read `$TEMP_FILE` with the Read tool and return its content verbatim to the caller. +- Cleanup: one final Bash call `rm -rf "$TEMP_DIR"` after reading. +- If `$TEMP_FILE` does not exist after agy exits, run the triage + transcript Plan B (see "Recovering a dropped response") before giving up: tail the log, and if it is the #76 empty-stdout case, recover the answer from `transcript.jsonl` and return it. If the log shows the auth-timeout signature, return the re-auth message. Only if recovery yields nothing, return agy's stdout plus: "agy did not write the expected output file at $TEMP_FILE and the response could not be recovered. Stdout (may be empty due to issue #76):" followed by the captured stdout. + +### Mode: review + +Git diff code review. The slash command captures the diff into `DIFF_FILE` before invoking the subagent. Same write-to-file workaround as `ask`. + +- Read `DIFF_FILE` first with ONE Bash call to confirm it exists and capture its size: + ```bash + test -f "$DIFF_FILE" && wc -l "$DIFF_FILE" || echo "MISSING" + ``` + If `MISSING`, return an error to the caller and stop. +- Build a temp output path the same way as `ask`: + ```bash + TEMP_DIR="$(mktemp -d -t agy-review-XXXXXX 2>/dev/null || mktemp -d)" + TEMP_FILE="$TEMP_DIR/review.md" + ``` +- Default timeout: `8m0s` (code review with reasoning needs more headroom than `ask`). +- Compose the prompt embedding the diff: + + ``` + Code review request. + + Focus: + + Review the following git diff for correctness, edge cases, potential bugs, security issues, and style. Tailor depth to the focus. Cite specific lines / files when calling out an issue. End with a one-paragraph overall verdict. + + Diff: + ```diff + + ``` + + OUTPUT INSTRUCTION: Do NOT print the answer to chat. Write your full review markdown to: + + Use the write_file tool. After writing, confirm the path. That is your only deliverable. + ``` + +- Invoke agy: + ```bash + agy --dangerously-skip-permissions --add-dir "$TEMP_DIR" --print-timeout 8m0s --print "" < /dev/null + ``` +- Read `$TEMP_FILE` with the Read tool and return its content verbatim. +- Cleanup: `rm -rf "$TEMP_DIR"` AND `rm -f "$DIFF_FILE"` (own the temp diff file — slash command handed it off). + +### Mode: research + +You MUST wrap the user's topic with the intensity template before invoking agy. Templates: (LOW, MEDIUM, HIGH — see below.) + +Critically, **append an explicit instruction** to the template telling agy to write its full markdown answer via `write_file` to the `WRITE_FILE` path, and to NOT print to stdout. This bypasses issue #76. Example tail to append: + +``` +OUTPUT INSTRUCTION: Do NOT print the answer to chat. Write the full markdown report to: + +Use the write_file tool. After writing, confirm the path. That is your only deliverable. +``` + +Invoke agy with `--add-dir ` so it can write. After agy returns, read the file and return to caller: +1. The path to the saved file. +2. The first ~30 lines of the file content (TL;DR / Executive summary section). + +#### LOW (timeout: 3m0s) + +``` +Investigate the following topic quickly on the web: . + +Rules: +- Find 3 to 5 trustworthy sources (official sites, primary documentation, peer-reviewed papers). +- Return a TL;DR with 3-5 actionable bullets. +- List sources at the end with title and clickable URL. +- Do not fabricate citations. If you could not find a solid source for a claim, say so explicitly. +- Do NOT state release dates, version numbers, parameter counts, prices, or benchmarks unless a cited source directly supports them — never infer or extrapolate a date/version, and never present a future or unreleased item as already shipped. Mark any unconfirmed specific as `[UNVERIFIED]`. +- Output language: match the language of the topic (default: English). + +Output format (markdown): + +## TL;DR +- bullet 1 +- bullet 2 +- bullet 3 + +## Sources +1. [Title](URL) — one line of context +2. ... +``` + +#### MEDIUM (timeout: 8m0s) + +``` +Balanced web research on: . + +Rules: +- Find 8 to 12 diverse sources (official docs, papers, well-starred repos, technical forums). +- Triangulate when sources contradict each other. +- Cite using [N] notation that maps to the References list at the end. +- Mark any claim you could not verify as `[UNVERIFIED]`. +- Do NOT state release dates, version numbers, parameter counts, prices, or benchmarks unless a cited source directly supports them — never infer or extrapolate a date/version, and never present a future or unreleased item as already shipped. Tie every hard specific to its [N] source. +- Output language: match the language of the topic (default: English). + +Output format (markdown): + +## Executive summary +3-5 sentences capturing the landscape. + +## Key findings +- Finding 1 with citations [1][3] +- Finding 2 with citation [2] +... + +## Analysis +Connections, implications, tradeoffs. Keep concise (200-300 words max). + +## References +1. [Title](URL) — author/org, date, one line of context +2. ... +``` + +#### HIGH (timeout: 20m0s) + +``` +Exhaustive web research on: . + +Rules: +- 15+ sources, aggressively triangulated. +- Prioritize primary sources: papers (arXiv, ACM, IEEE), official docs, repos with stars, RFCs. Avoid blogspam and SEO content. +- Include counterarguments and dissenting positions. +- Explicitly identify evidence gaps (what is NOT yet known). +- Cite using [N] notation mapped to References. +- Mark weak claims as `[WEAK EVIDENCE]`. +- Do NOT state release dates, version numbers, parameter counts, prices, or benchmarks unless a cited source directly supports them — never infer or extrapolate a date/version, and never present a future or unreleased item as already shipped. Tie every hard specific to its [N] source; mark the rest `[UNVERIFIED]`. +- Output language: match the language of the topic (default: English). + +Output format (markdown): + +## TL;DR +3 bullets for the rushed reader. + +## Context +Why this topic matters right now. + +## Findings +### Finding 1: +Detail with citations [1][2]. Include concrete data when available. + +### Finding 2: ... + +## Comparisons +Table or comparative list of the relevant options/positions/tools. + +## Risks and counterarguments +- Risk/counterargument 1 [N] +- ... + +## Evidence gaps +What remained unverified and why. + +## Conclusion +Actionable recommendation + confidence level (high/medium/low). + +## References +1. [Title](URL) — author/org, date, type (paper/docs/repo/post) +2. ... +``` + +After getting the file content back, prepend YAML frontmatter (use `Write` tool once): + +```yaml +--- +title: "" +type: research +intensity: +created: +sensitivity: internal +source_tool: agy +--- +``` + +### Mode: record + +Browser walkthrough recording. + +- Timeout: `8m0s` for simple flows, `15m0s` if `STEPS` contains more than ~5 distinct actions or words like "login", "fill", "checkout", "wait", "scroll through many pages". +- Always pass `--add-dir ` so agy can write the report into the project. +- Build the prompt as: + + ``` + Browser walkthrough recording task. + + Target URL: + + Steps to perform: + + + After completing the browser actions, write a markdown report to the following ABSOLUTE path: + / + + The report MUST contain: + - Target URL and final URL (after any redirects) + - Page title detected + - A description of each step you performed + - Any errors, login walls, or unexpected behavior + - The ABSOLUTE path to the .webm recording you produced (it will be saved by the browser subagent in ~/.gemini/antigravity-cli/browser_recordings/ — find the most recent .webm in that directory and report its absolute path) + + OUTPUT REQUIREMENT (CRITICAL): Do NOT print anything to chat. The markdown report file is your only deliverable. + ``` + +- After agy returns, execute this post-processing (one combined Bash call is fine): + 1. Parse the report file at `/` to find the absolute `.webm` path agy reported. + 2. Copy that `.webm` to `/` (Bash `cp` works on all three platforms). + 3. Copy `initial_state.png` and `final_state.png` from `~/.gemini/antigravity-cli/browser_recordings/` to `/docs/agy/recordings/` if present (rename to `--initial.png` and `--final.png`). + 4. Check for ffmpeg: `command -v ffmpeg` (POSIX) or `where ffmpeg` (Windows). If found, convert: + ```bash + ffmpeg -y -i / -c:v libx264 -crf 23 -preset fast -an / + ``` + (`-an` because the recording has no audio anyway — strips empty audio track.) + 5. If ffmpeg missing, append a `> [!NOTE]` block to the report file with the install hint. + +- Return to caller (verbatim): + 1. Saved `.webm` path. + 2. Saved `.mp4` path if conversion succeeded. + 3. Saved screenshot paths. + 4. Saved report path. + 5. First ~30 lines of the report. + +### Mode: scrape + +Structured data extraction from a single URL. + +- Timeout: `5m0s` for static pages, `10m0s` if the URL is a JS-heavy SPA or the schema is complex. +- Always pass `--add-dir `. +- Prompt template: + + ``` + Web scraping task. + + Target URL: + Schema / what to extract: + Output format: + + Steps: + 1. Fetch the URL (use read_url for static HTML, or the browser subagent for JS-heavy pages). + 2. Extract the requested fields. If a field is missing, use null/empty (do NOT fabricate). + 3. If the page is paginated, scrape the first page only unless asked otherwise. Note pagination in the report. + 4. Format the output according to FORMAT. + + Write the result to this ABSOLUTE path: / + + For markdown format: include a brief preamble (URL, timestamp, schema used) followed by the structured data. + For JSON format: write a single valid JSON document. Validate before writing. + + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, return to caller: + 1. Saved file path. + 2. Number of records/fields extracted (count from the output). + 3. First ~30 lines of the file (preview). + +### Mode: doc-to-md + +Multimodal document → clean markdown conversion. + +- Timeout: `8m0s` for small docs (<20 pages), `15m0s` for large (>20 pages). +- Always pass `--add-dir `. +- Prompt template: + + ``` + Document conversion task. + + Source file: + File type: + Focus / what to keep: + + Steps: + 1. Read the source file using your file/multimodal tools (depending on FILE_TYPE). + 2. Convert the content to clean Markdown, preserving: + - Headings hierarchy + - Tables (use markdown table syntax) + - Lists (ordered/unordered) + - Code blocks if present + - Emphasis (bold, italic) + 3. For inline images in the source: insert `![](source-image-N)` placeholders, where N is a sequential index. + 4. Do NOT translate the content — keep the original language. + 5. Add a brief frontmatter with the source path, file type, and conversion date. + + Write the converted markdown to this ABSOLUTE path: / + + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, return to caller: + 1. Saved markdown path. + 2. Original source path + detected type. + 3. First ~30 lines of the conversion (preview). + 4. Approximate page/section count. + +### Mode: notebook + +One document → one **objective-driven** summary (NOT a faithful conversion). Part of the +`/agy:notebook` sweep (a local NotebookLM): the caller fans this out, one call per document. + +- Timeout: `6m0s` (single document, summary not full transcription). +- **Run agy from a NEUTRAL scratch CWD, not the project root** — `cd "$(mktemp -d 2>/dev/null || echo "$TEMP")"` (or any dir outside a git repo) right before invoking agy. Running with the CWD inside the calling git project makes agy 1.0.10 register it as a cascade "project" and **sandbox every `write_file` to `brain//`, then REJECT the absolute output path** ("not a valid artifact path") → the model replans/retries 3-5× per document (~4× slower) and also snapshots the repo's untracked files on startup. A neutral CWD avoids both. **Verified:** clean CWD = 1 write, ~10s; project CWD = 5 round-trips, ~43s for the same 1-page doc. +- **Grant file access with repeatable `--add-dir` (NOT `--add-dir `):** pass `--add-dir ""` **and** `--add-dir ""` (in `text` mode both are under the notebook output dir, so one suffices; in `vision` mode add the source document's folder too). +- The file agy must READ depends on `INPUT_MODE`: + - `text` → `` (already-extracted plain text; cheaper/faster than vision). + - `vision` → `` (scanned PDF / image; agy uses multimodal OCR). +- Prompt template: + + ``` + Document summary task — objective-driven. Output language: Spanish (es-AR). + + Objetivo del caso: + + Read this file with your file / multimodal tools: + (If it is a scanned image/PDF, OCR it. Do not translate; keep original wording for quoted data.) + + Write a concise Spanish summary to this ABSOLUTE path: , with EXACTLY this shape: + + --- + doc: + tipo: + numero_gde: + fecha: + emisor: + relevancia: + --- + ## Síntesis + <2-6 sentences focused on the objetivo: what this document contributes to it> + ## Datos clave + - + ## Relevancia + <1-2 sentences: why it is (or isn't) relevant to the objetivo> + + Be faithful: if a date/number is unreadable, say "ilegible", do NOT invent it. + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, return to caller: the saved summary path and the `relevancia` value (from + the frontmatter). Keep it terse — the caller is looping over many documents. + +### Mode: notebook-index + +All per-document summaries → a relevance **index** + a cited **master synthesis**. One call, +run after the whole sweep. Reads only the small `*.resumen.md` files. + +- Timeout: `8m0s`. +- **Run agy from a neutral scratch CWD** (`cd "$(mktemp -d)"`), not the project root — see the rationale under `Mode: notebook` (project CWD → `brain/` artifact sandbox → write rejected → slow retries). Grant access with `--add-dir ""` (it holds both the `*.resumen.md` inputs and the four output files); do NOT pass `--add-dir `. +- Prompt template: + + ``` + Synthesis task over a set of document summaries. Output language: Spanish (es-AR). + + Objetivo del caso: + + Read every "*.resumen.md" file in this directory: + Each has frontmatter (doc, tipo, numero_gde, fecha, relevancia) and a short summary. + + Write FOUR files using write_file: + + 1. — a Markdown index of ALL documents, a table sorted by `relevancia` desc: + | Doc | Tipo | Fecha | Relevancia | Por qué (1 línea) | + Put a "## TOP — más relevantes para el objetivo" section first with the highest-scoring docs, + then "## Todos los documentos" with the full table. Mark any `estado: no_procesado` docs. + + 2. — RESUMEN_MAESTRO: a synthesis of the whole corpus oriented to the objetivo: + - "## Respuesta al objetivo": directly answer the objetivo from the evidence. + - "## Síntesis del caso": the narrative, CITING documents inline by numero_gde or doc name + (e.g. "según IF-2026-02429965 …", "[0041]"). + - "## Línea de tiempo": bullet timeline of key dated milestones. + - "## Conclusión": 2-4 sentences. + Ground every claim in the summaries; if the evidence is insufficient for part of the + objetivo, say so explicitly. Do NOT invent facts not present in the summaries. + + 3. — TIMELINE: a single chronological Markdown table of every dated event found + in the summaries: | Fecha (YYYY-MM-DD) | Hecho | Doc (numero_gde o nombre) |, sorted ascending. + Skip undated items. This is the "línea de tiempo" as a standalone briefing artifact. + + 4. — ENTIDADES: extracted entities grouped under headings, each with the doc(s) + where it appears: "## Personas" (nombre + DNI/CUIL si aparece), "## Montos" (importe + concepto), + "## Expedientes y resoluciones" (números GDE / EX / resoluciones), "## Escuelas / organismos". + Only entities actually present in the summaries; no invention. + + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The four written files are your only deliverable. + ``` + +- After agy returns, verify `INDEX_FILE`, `MASTER_FILE`, `TIMELINE_FILE` and `ENTIDADES_FILE` exist + and are non-empty (same WRITE_FILE check / recovery as other modes). Return their paths to the caller. + +### Mode: notebook-ask + +Answer a question from the existing per-document summaries (the "chat" over a notebook corpus). +Reads only the small `*.resumen.md` files — never the original documents. + +- Timeout: `5m0s`. +- **Run agy from a neutral scratch CWD** (`cd "$(mktemp -d)"`), not the project root — see `Mode: notebook` (project CWD → `brain/` artifact sandbox → write rejected → slow retries). Grant access with `--add-dir ""` (holds the `*.resumen.md` inputs and the answer file); do NOT pass `--add-dir `. +- Prompt template: + + ``` + Question-answering task over a set of document summaries. Output language: Spanish (es-AR). + + Pregunta: + + Read every "*.resumen.md" file in this directory: . Each has frontmatter + (doc, tipo, numero_gde, fecha, relevancia) and a short objective-driven summary. + + Answer the pregunta using ONLY what the summaries contain. CITE the source document(s) inline for + every claim (by numero_gde or doc name, e.g. "según IF-2026-02429965 [0041]"). If the summaries do + not contain enough to answer, say so explicitly and point to which documents might hold it (by + relevance) instead of inventing. Lead with a direct answer, then the supporting detail with citations. + + Write the answer to this ABSOLUTE path: . + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, verify `WRITE_FILE` exists and is non-empty (same WRITE_FILE check / recovery). + Return its path and the answer to the caller. + +### Mode: notebook-group + +Summarise a **batch of text documents in a SINGLE agy call** — `/agy:notebook` packs text docs +(≤4 docs / ≤24k chars per batch) so one agy invocation produces several summaries, cutting the +dominant cost at scale (agy invocations-per-minute). **One summary file PER member** (NOT a combined +file), each in the exact `Mode: notebook` shape, so `Mode: notebook-index` keeps full per-doc +granularity with no index-side change. + +- Timeout: `6m0s` (batch is char-budgeted ≤24k by the caller, so it stays well under the limit). +- **Run agy from a neutral scratch CWD** (`cd "$(mktemp -d)"`), not the project root — see `Mode: notebook` (project CWD → `brain/` artifact sandbox → write rejected → slow retries). Grant access with `--add-dir ""` (the notebook output dir, which also holds the `_text/` member files); do NOT pass `--add-dir `. +- `MEMBER_FILES` (a.k.a. `TEXT_FILES`) is a `|`-joined list of extracted-text paths; `MEMBER_NAMES` + the matching display names; `WRITE_FILES` the matching `|`-joined output paths — all three in the + SAME order, one entry per document. +- Prompt template: + + ``` + Batch document-summary task — objective-driven, one summary PER document. Output language: Spanish (es-AR). + + Objetivo del caso: + + You are given several pre-extracted plain-text documents. Summarise EACH one INDEPENDENTLY. + Text files (pipe-separated, in order): + Output files (pipe-separated, SAME order): + + For EACH input file, read it fully and write its OWN summary, with write_file, to the matching + output ABSOLUTE path, EXACTLY in this shape (the same as a single-document summary): + + --- + doc: + tipo: + numero_gde: + fecha: + emisor: + relevancia: + --- + ## Síntesis + <2-6 sentences focused on the objetivo> + ## Datos clave + - + ## Relevancia + <1-2 sentences: why it is (or isn't) relevant to the objetivo> + + Never merge two documents into one file. Never skip a file. One output file per input, same order. + If a date/number is unreadable write "ilegible"; do NOT invent it. + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written files are your only deliverable. + ``` + +- After agy returns, verify EVERY entry in `WRITE_FILES` exists and is non-empty (`test -s`). Report + how many were written and list any missing members so the caller can retry/stub just those. + +### Mode: transcribe + +Audio/video (file or YouTube/remote URL) -> faithful transcript + summary. Gemini is natively +multimodal in audio/video; Claude Code is not, so this offloads a capability Claude lacks. + +- Timeout: `6m0s` for audio, `12m0s` for video/url (longer media take longer). +- File pass `--add-dir ` (to read the source) **and** `--add-dir ` (to write). For a + URL there is no local file, so pass only `--add-dir `. +- Prompt template: + + ``` + Audio/video transcription + summary task. + + Source (): + Focus / what to emphasize: + + Listen to / watch the source. Transcribe it COMPLETELY and faithfully in its ORIGINAL language + (do not translate). Mark unclear parts [inaudible]. For = video or url, prefix natural + segments with timestamps (mm:ss). Then add a short summary in the same language. + + Write the result to this ABSOLUTE path: , with sections: + ## Transcripción (timestamps for video/url) + ## Resumen (2-5 lines; key points / decisions / action items per FOCUS) + + Do NOT invent. OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, verify `WRITE_FILE` exists and is non-empty (same check / recovery). Return its path. + +### Mode: media + +Multimodal Q&A over an audio/video/image (file or URL) — beyond transcription. + +- Timeout: `6m0s` for audio/image, `12m0s` for video/url. +- Same `--add-dir` logic as transcribe (source dir + CWD for files; CWD only for URLs). +- Prompt template: + + ``` + Multimodal question-answering task. Output language: Spanish (es-AR), unless the source/question is in another language. + + Source (): + Pregunta: + + Listen to / watch / look at the source. Answer the pregunta using ONLY what you actually heard or + saw. For audio/video, cite time references (e.g. "alrededor de 02:30") where relevant. If the + source doesn't contain enough to answer, say so explicitly — do not invent. + + Write the answer to this ABSOLUTE path: . + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, verify `WRITE_FILE` exists and is non-empty (same check / recovery). Return its path. + +### Mode: design-review + +UX/visual audit using browser subagent + multimodal vision. + +- Timeout: `12m0s` (multi-viewport capture + analysis takes longer than scrape). +- Always pass `--add-dir `. +- Prompt template: + + ``` + Visual & UX design review task. + + Target URL: + Focus: + + Open the URL, capture screenshots at desktop (1440x900) and mobile (375x667) viewports, then produce a comprehensive review covering: + + 1. Visual hierarchy + 2. Typography (fonts, sizes, line-height, contrast) + 3. Color system (palette, WCAG AA/AAA contrast, gradients) + 4. Spacing & layout + 5. Interactive elements (button states, form affordance, micro-interactions) + 6. Brand & tone + 7. Accessibility (a11y) — heuristic check of alt text, focus indicators, ARIA + 8. UX heuristics (Nielsen's 10 — quick scan) + 9. Mobile/responsive behavior (compare desktop vs mobile screenshots) + 10. Competitive context (if FOCUS mentions a category, compare against industry standards) + + End with: + - 3 concrete strengths (with file/DOM references) + - 3 highest-leverage improvements (ordered by ROI: low effort + high visual impact first) + - Overall design score /10 with one-sentence rationale + + Reference the screenshot paths in the report. + + Write the full review markdown to this ABSOLUTE path: / + + OUTPUT REQUIREMENT (CRITICAL): Do NOT print to chat. The written file is your only deliverable. + ``` + +- After agy returns, return to caller: + 1. Saved report path. + 2. Screenshot paths (desktop + mobile if available). + 3. First ~30 lines of the report (executive summary + score). + 4. Highest-priority improvement (top of the action list). + +### Mode: report-analyze + +Phase 1 of the `/agy:report` flow. Read the source markdown and match it against the canonical TRAID Design System catalog of 5 templates. Recommend the top 1-3 templates that best fit the content, return as JSON. The user picks one in Phase 3. + +- Default timeout: `3m0s` (matching against a fixed catalog is fast). +- Build a temp output path: + ```bash + TEMP_DIR="$(mktemp -d -t agy-analyze-XXXXXX 2>/dev/null || mktemp -d)" + TEMP_FILE="$TEMP_DIR/recommendations.json" + ``` +- Build the prompt with the FULL catalog embedded: + + ``` + You are a senior design director matching documents to a canonical design system. Read the markdown source, identify content cues, and recommend which template(s) from the TRAID Design System fit best. + + Read the markdown file at this absolute path: + + Analyze content cues: length, target audience (technical vs business vs general), formality register, tone (academic / commercial / editorial / functional / longform / runbook), presence of data/tables/citations/code/blockquotes, citation density, technical depth, and whether the content is for INTERNAL use (manuals, SOPs, knowledge notes), CLIENT-FACING (proposals, presupuestos, brief), RESEARCH (whitepapers, analysis), or PUBLICATION (blog, longform, thought leadership). + + Match against this fixed catalog of 5 templates (canonical TRAID Design System, approved 2026-05-29): + + --- CATALOG START --- + + TEMPLATE ID: traid-dark + Name: TRAID Dark Deck + Use case: presupuestos, manuales TRAID, decks comerciales, video presentations, LinkedIn carousels, screen-only viewing + Palette: pink #ec4899 + glow rgba(236,72,153,0.18) + obsidian bg #0a0f1a + surface #131927 + cool white #e2e8f0 + Typography: Inter (15px body / line-height 1.6, weights 800-900 in titles, letter-spacing 2.5px in caps) + Layout: max-width 880px, left-aligned, screen-first, NOT A4 + Vibe: commercial-dense, branded, nocturnal, premium-glow + Key components: brand-logo + meta header, problem-panel dark with pink glow border, mvp-card linear-gradient pink + halo, sprint-grid with letter badges + base highlight, guarantee dark + pink check, invest-table highlighted row, foot with pink TRAID mark glow + Content fit: anything sales/commercial/branded that will be viewed on screen — proposals, internal manuals for TRAID team, video presentations, LinkedIn deck carousels + + TEMPLATE ID: traid-light + Name: TRAID Sales Deck (light variant) + Use case: variable LIGHT de traid-dark — propuestas para print/PDF/email donde dark mode no funciona + Palette: pink #ec4899 + pink-soft #fce7f3 + ink #0f172a + white bg + bg-soft #f8fafc + black #0a0f1a (for problem-panel and guarantee dark cards) + Typography: Inter (14.5px body / line-height 1.6, weights 800-900, letter-spacing 2.5px caps, -0.5px h1) + Layout: max-width 820px, A4 print-ready (@page size A4, page-break-inside avoid on panels) + Vibe: commercial-dense, branded, professional, print-ready + Key components: same vocabulary as traid-dark (brand-logo, problem-panel, components-box, mvp-card gradient, sprint-grid letter badges, deep-card 2-col qué/qué, guarantee dark + check pink, invest-table highlighted) but light surfaces with dark panels as accents + Reference exemplar: docs/propuestas/2026-05-26/martin-identificacion-b2b-ar/propuesta.html + Content fit: client-facing proposals/presupuestos that need PDF export or email-friendly white background + + TEMPLATE ID: stripe-press + Name: Stripe Press Editorial + Use case: research papers, whitepapers, longform técnico scholarly, technical analysis for technical audiences + Palette: cloud paper #fafafa + off-black #171717 + indigo blueprint #6366f1 + indigo deep #4f46e5 + muted #737373 + hairline rgba(23,23,23,0.08) + code-bg #f5f5f7 + Typography (TRIPLE): Source Serif 4 body + headings (18px / line-height 1.7) + Inter (kickers/metadata/captions) + JetBrains Mono (code) + Layout: max-width 720px centered, 6rem top / 4rem horizontal padding + Vibe: scholarly, authoritative, premium, restrained-but-confident + Key components: kicker "BRIEF/RESEARCH NOTE" caps tracking, h1 56px serif + indigo rule, h2 36px + hairline + Inter kicker "01. SECTION", h3 22px serif, drop-cap indigo after h2, TOC right-aligned, TL;DR callout card with indigo border, citations [N] superscript indigo with #ref-N anchors, hanging-indent references, booktabs tables (top + bottom + header rule, no vertical), code blocks with code-bg + 6px radius, blockquote left 3px indigo + italic serif + Content fit: anything with 10+ citations, academic-analytical tone, research/whitepaper structure, technical depth, longform for technical audiences + + TEMPLATE ID: notion-docs + Name: Notion Modern Docs + Use case: documentos sencillos, runbooks, SOPs, docs internas, knowledge notes, dashboards funcionales, troubleshooting guides + Palette: white bg + Notion off-white #f7f6f3 + Notion charcoal #37352f + Notion blue #2383e2 + 5 callout colors (blue #ddebf1, yellow #fdecc8, red #fbe4e4, green #ddedea, purple #eae4f2) + code-bg #f1f1ef + code-fg #eb5757 + Typography: Inter system fallback (16px / line-height 1.5) + Layout: 1200px wrap with 220px sticky TOC sidebar + main column ~720px + Vibe: functional, clean, readable, scannable + Key components: sticky TOC sidebar with IntersectionObserver active-section highlight, callout boxes with emoji icons (💡 blue / ⚠️ yellow / 📝 purple / ✅ green / 🔴 red), code blocks with code-lang label top-right, tables with hover row highlight, anchor # indicator on heading hover, lists with • bullet in accent, external links with ↗ indicator + Content fit: any internal doc, runbook, SOP, simple knowledge note, how-to guide; replaces traditional blockquote with purple callout + + TEMPLATE ID: magazine + Name: Magazine Editorial Wired 2023 + Use case: publicaciones longform, blog posts, Substack, thought leadership, contenido editorial para audiencia mass-market + Palette: warm white #fafafa + paper #f1ede4 + ink #0d0d0d + vibrant red-pink #ff3366 + electric blue #1a1aff + gradient linear-gradient(135deg, #ff3366, #ff8800) + Typography (DRAMATIC): Fraunces display (opsz 144) for h1/h2/pull-quotes + Inter (17px body / line-height 1.6) + JetBrains Mono (code) + Layout: editorial bold, custom 10px scrollbar with accent thumb + Vibe: bold, editorial, mass-market, dramatic + Key components: h1 96px Fraunces with gradient background-clip text, h2 42px Fraunces with Inter kicker "01 / SECCIÓN" + gradient rule, gradient drop-cap first letter 80px after each h2, full-width pull-quotes Fraunces 32px italic with giant accent quotation mark, byline + deck after h1 (autor + fecha + subtitle Fraunces italic), section ornaments (◆ or ※ 24px gradient) between sections, 2-col risks/mitigations card grid with paper bg + accent left border, ■ end mark + Content fit: opinion pieces, narrative longform, "por qué X importa", personal posts, thought leadership for non-academic audiences + + --- CATALOG END --- + + Now make your recommendations. Return TOP 1-3 templates ranked by fit, with a content-specific justification for each. + + - If exactly one template clearly dominates (e.g., the source is literally a presupuesto), return just 1 option. + - If two templates are both plausible (e.g., commercial content that could be shown on screen OR printed), return 2 ranked. + - If the content is genuinely ambiguous, return 3 ranked. + - NEVER return options that don't match the content. Don't pad to 3 if only 1 fits. + + OUTPUT INSTRUCTION (CRITICAL): Do NOT print to chat. Write a single JSON object to: + + + The JSON schema (exact shape, no extra keys, valid JSON): + + { + "content_analysis": { + "tone": "one phrase", + "audience": "internal | client-facing | research | publication", + "structure": "brief description (e.g. '5 h2 sections, 3 citations, 1 table, 1 code block')", + "key_cues": ["cue 1", "cue 2", "cue 3"] + }, + "recommendations": [ + { + "template_id": "traid-dark | traid-light | stripe-press | notion-docs | magazine", + "rank": 1, + "fit_score": "high | medium | low", + "justification": "one sentence citing specific content cues from the source that match this template" + }, + { ... up to 2 more, ranked 2 and 3 ... } + ] + } + + Use the write_file tool. After writing, confirm the path in one line. That is your only deliverable. + ``` + +- Invoke agy with `--add-dir` for both the source dir and temp dir: + ```bash + agy --dangerously-skip-permissions --add-dir "$(dirname "$SOURCE_FILE")" --add-dir "$TEMP_DIR" --print-timeout 3m0s --print "" < /dev/null + ``` +- After agy returns: apply the standard output-file existence check (see Known issue — Windows rename). If the JSON file exists, read it with the Read tool and return its content verbatim to the caller (caller will parse to drive Phase 3 AskUserQuestion). +- Cleanup: `rm -rf "$TEMP_DIR"`. + +### Mode: report-generate + +Phase 2 of the `/agy:report` flow. Read source markdown + style spec, ask agy to generate self-contained branded HTML, handling `![generate: ...]` image cues per the `IMAGES` mode (native generation / external pre-generated / placeholder), write to WRITE_FILE, then verify referenced images exist. + +- Default timeout: `15m0s` (image generation + HTML composition is expensive). +- Pre-check: the caller passes `WRITE_FILE` (absolute path under `docs/agy/reports/`), `SOURCE_FILE` (absolute path to markdown), and `IMAGES` (`native` | `external` | `none`, default `native`). The parent dir of WRITE_FILE must already exist (the slash command created it). +- **Compute `ASSETS_DIR`** = `WRITE_FILE` with the trailing `.html` replaced by `.assets` (e.g. `.../2026-06-06-x.html` → `.../2026-06-06-x.assets`). This is the canonical assets path — the `` in the HTML must reference it **relative to the HTML** (i.e. `.assets/.png`). Do NOT use `.assets` (that would append `.assets` after `.html`). Create it with one Bash call before invoking agy: `mkdir -p ""`. +- **Slug convention (deterministic, shared with `external` mode):** for each `![generate: ]` cue, `slug` = description lowercased, non-alphanumeric → `-`, collapsed, trimmed to 60 chars; filename = `.png`. This MUST match exactly between what agy writes (`native`) and what a caller pre-generates (`external`). +- **IMAGES mode** controls how cues become images (see the Image cues section of the prompt below): + - `native` (default): agy generates each image itself via its `generate_image` tool. Works without external deps but quality/format is inconsistent in headless mode (agy 1.0.6 sometimes emits JPEG bytes with a `.png` name, or skips generation entirely — the post-generation check below catches this). + - `external`: agy does NOT generate images. It references `.assets/.png` for each cue, assuming the caller already placed real PNGs there (e.g. generated with a dedicated image model like Nano Banana 2). This is the **recommended path for brand-quality infographics**. + - `none`: agy renders a styled placeholder `
` per cue (accent-bordered box with the description as caption), no ``. Layout stays intact; no broken images. +- Build the prompt: + + ``` + You are a senior design engineer producing a final, publication-grade HTML report. Your output will be reviewed by a design-conscious operator with high standards. This is NOT an exercise — the HTML is archived as a permanent reference and may be shared with clients or stakeholders. Generic outputs are rejected; tasteful, opinionated, distinctive design wins. + + ## Source material + - Markdown source (read it first; let the content drive design choices): + - Output HTML path: + + ## Design brief + + + Apply this brief rigorously. The chosen reference exemplar is the north star — the operator should be able to look at the rendered HTML and recognize the reference. + + ## Required design qualities (non-negotiable) + + **Typography** + - Opinionated font pairing per the style brief. NOT default Times/Arial/system fonts unless the brief explicitly says "system fonts only". Use a single `` to Google Fonts for the chosen pair. + - Body text 17-20px on desktop, never less than 16px. Line-height 1.55-1.75 for body, 1.2-1.3 for headings. + - Distinct treatments for h1/h2/h3 — different weights, sizes, AND ornamentation (rule above, kicker label, color, drop letter). h1 should feel like a magazine title block, not a sized-up h2. + - First paragraph after each h2 may use a tasteful drop-cap or larger leading sentence when it fits the reference exemplar. + + **Color** + - Use CSS custom properties on `:root` for the FULL palette: `--bg`, `--surface`, `--fg`, `--fg-muted`, `--accent`, `--accent-2`, `--rule`, `--code-bg`. + - The accent must appear in at least 4 semantic contexts: links, blockquote left border, h2 underline/rule, citation numbers, inline code background. + - Establish visible hierarchy between body, captions, footnotes, and metadata using TINTS of the palette, not arbitrary greys. + + **Layout** + - Reading column max-width that respects optimal line length (60-80 chars). Center it; leave room for sidenotes if the reference exemplar uses them (e.g., Tufte). + - Generous whitespace; sections separated by tasteful ornament (horizontal rule with center glyph, color block, large vertical gap), not just margin. + - Table of contents at top if the source has 5+ top-level sections (h2). Each entry anchor-links to its section. + - Executive summary / TL;DR / Key takeaways block at the top, visually distinct (callout card, accent-tinted background, or rule-framed block). + - If source uses `[N]` inline citations with a Sources/References section, anchor-link each `[N]` (use `` + `id="ref-N"` on the entry) and style the in-text number distinctly (small caps, accent color, or superscript). + + **Components** + - **Tables**: booktabs styling (top + bottom + header underline only, NO vertical rules). Header in bold + accent color. Optional zebra striping in the lightest palette tint. + - **Code blocks**: monospaced, padded, tinted card background with rounded corners. Inline code: subtle background tint, no border, slight horizontal padding. + - **Blockquotes**: left border in accent color, italic body, attribution line in smaller muted type. + - **Lists**: tighter than default leading, generous indent. Custom bullet style if it fits the reference (e.g., square bullets for Swiss, em-dashes for editorial). + - **Figures**: every `` wrapped in `
` with `
` in muted color, smaller size. + - **ASCII diagrams / code-art blocks**: monospace, preserved whitespace, framed background, not centered. + + **Responsive + print** + - 640px mobile breakpoint: proportional font reduction, single-column TOC, simplified ornament, full-width images. + - `@media print`: white bg, black fg, page-break-after on h1/h2, no orphan headings, TOC hidden, citations rendered as footnotes if feasible. + + **Header + footer** + - Header: document title in a strong typographic block, kicker label above (e.g., "RESEARCH", "BRIEF", "PROPOSAL"), date in the document's metadata format, subtitle/byline if appropriate. + - Footer: source markdown path + generation date + small attribution "Generated via agy + Claude Code". + + ## Forbidden anti-patterns + + Any of these in the output = reject: + + - ❌ Plain centered title above body text with no other visual treatment. + - ❌ Default browser scrollbars and form controls visible. + - ❌ External CDN scripts (Google Fonts via `` is OK; nothing else). + - ❌ Generic emoji 🚀 ⭐ ✅ in headings unless the reference exemplar specifically calls for it. + - ❌ Tables with no styling treatment. + - ❌ Headings that look identical except for size. + - ❌ Single dominant color used flatly with no shades or accents. + - ❌ "Lorem-ipsum"-feeling spacing where every element has the same margin. + + ## Image cues (IMAGES mode = ) + For each `![generate: ]` cue in the source markdown, derive `` = description lowercased, non-alphanumeric → `-`, collapsed, trimmed to 60 chars. The assets directory is `` and every `` MUST be `/.png` (relative to the HTML). If the source has ZERO cues, do not invent images. Behave per IMAGES mode: + - **native**: invoke the native `generate_image` tool for each cue and save as `/.png`. Request **PNG** output explicitly. Embed via `
` with `
` showing a short caption. + - **external**: do NOT generate any image. Only emit the `
...
...
` markup — the PNG files already exist (the caller generated them). Use the exact slug convention above so the filenames match. + - **none**: emit NO ``. For each cue render a styled placeholder `
`: an accent-bordered, tinted box (use the palette `--accent`/`--surface`) containing the cue description as `
` text. Layout must stay intact with no broken images. + + ## Process + + 1. Read the markdown source. + 2. Inventory the document: section count, citation count, code blocks, tables, image cues, ASCII art. Decide where TOC / TL;DR / drop-caps / sidenotes fit. + 3. Generate any required images. + 4. Compose the HTML applying ALL required qualities. Bias toward over-design before under-design. + 5. Write to via write_file. + + ## Output instruction (CRITICAL) + Do NOT print HTML or commentary to chat. The written file at is your only deliverable. After writing, confirm the path in one line and stop. + ``` + +- Invoke agy (`--add-dir` covers the reports dir, which contains `ASSETS_DIR`, so agy can write images there in `native` mode): + ```bash + agy --dangerously-skip-permissions --add-dir "$(dirname "$SOURCE_FILE")" --add-dir "$(dirname "$WRITE_FILE")" --print-timeout 15m0s --print "" < /dev/null + ``` +- Output-file existence check (issue #76 / Windows rename mitigation). If WRITE_FILE does not exist or is empty after exit 0, run the standard triage + retry/recovery from the "Output-file existence check" section. +- **Assets-existence check (NEW — never ship broken images).** After the HTML exists, verify every `` it references resolves to a real non-empty file. One Bash call: + ```bash + HTML_DIR="$(dirname "")"; tot=0; miss=0 + for src in $(grep -oE ']+src="[^"]+"' "" | sed -E 's/.*src="([^"]+)".*/\1/'); do + tot=$((tot+1)); [ -s "$HTML_DIR/$src" ] || { echo "MISSING: $src"; miss=$((miss+1)); } + done + echo "IMAGES_PRESENT=$((tot-miss))/$tot" + ``` + - In `native` mode: if any are MISSING, agy referenced images it failed to generate (common on 1.0.6). Report the missing list to the caller and recommend re-running with `--images external` after pre-generating the PNGs, OR `--images none`. Do NOT silently return — broken `` tags are a failure. + - In `external` mode: MISSING means the caller has not yet placed those PNGs in `ASSETS_DIR`. Report exactly which `.png` files are expected so the caller can generate them. +- Return to caller: + 1. Saved HTML path. + 2. IMAGES mode used + the `IMAGES_PRESENT=/` line, and the list of any missing `.png`. + 3. The `ASSETS_DIR` absolute path (so the caller knows where to drop external images). + 4. Approximate HTML size in KB. + +### Mode: setup + +Because of issue #76, a "reply pong to stdout" ping is useless here — `agy --print` writes +nothing to stdout when not a TTY, whether it worked or not. **Test with `write_file` and check +the file instead.** Use a writable temp dir via `--add-dir`: + +```bash +OUT="${TMPDIR:-/tmp}/agy_ping.txt"; LOG="${TMPDIR:-/tmp}/agy_ping.log"; rm -f "$OUT" "$LOG" +agy --dangerously-skip-permissions --add-dir "$(dirname "$OUT")" --print-timeout 90s --log-file "$LOG" --print "Use the write_file tool to write exactly the word pong to the file $OUT . Write nothing else and use no other tools." < /dev/null +cat "$OUT" 2>/dev/null +``` + +- **Success = the file `$OUT` exists and contains `pong`.** If so, agy is installed, authenticated + and working — report success. +- **⚠️ IGNORE these log lines — they are non-fatal noise, present even on a fully successful run:** + `"You are not logged into Antigravity"`, `"getting token source"`, `FetchAvailableModels`, + `loadCodeAssistResponse`, `userInfo`, `ListExperiments`, `Skipping telemetry`. They come from + secondary auth scopes (code-assist features, model list, telemetry); the core `streamGenerateContent` + model calls do NOT need that token. **Do NOT report them as a login problem and do NOT tell the + user to re-authenticate based on them — that is a false alarm** (verified 2026-06-19: agy wrote + the file successfully while emitting all of those lines). +- If the file was **NOT** written, triage in order: + 1. Binary missing / not on PATH → tell the user to install Antigravity (see install note). Stop. + 2. Log shows `streamGenerateContent` calls but a `timed out` / no `write_file` → it's the + **task or timeout**, not auth: re-run with a larger `--print-timeout` or a smaller/stricter task. + 3. Only a genuine fatal sign-in line (e.g. `OAuth login required`, `please run agy to sign in` — + NOT the secondary-scope warnings above) means re-login: tell the user to run `agy` once + interactively in a normal terminal to complete the Google sign-in. +- Report: binary path and version (`agy changelog | head -n 1`), and whether the write_file ping succeeded. +- Do NOT touch user PATH or environment variables. If the binary is missing, just say so and stop. + +## Safety rules + +- One `Bash` call for the main `agy` invocation per attempt (mode `research`/`ask`/`review`/`scrape`/`doc-to-md`/`design-review`/`report-generate`/`notebook`/`notebook-index`/`notebook-ask`/`notebook-group`/`transcribe`/`media` may retry once if the WRITE_FILE check detects the Windows rename bug — a second `Bash` call to agy is allowed only on retry, not for branching logic). +- The pre-flight `.tmp` sweep adds one Bash call before agy in every mode. The output-file check adds one Bash call after agy (test -s + optional log tail) in modes with WRITE_FILE. +- **Response recovery is allowed when output is missing/empty** (issue #76): one Bash call to tail the log for triage, and one Bash call to run the transcript Plan B recovery. These are recovery calls, not exploration — only run them when stdout is empty or the WRITE_FILE check failed, never speculatively. `rescue` mode (no WRITE_FILE) may use these same two recovery calls when stdout comes back empty. +- Mode `record` and `research` may use one additional `Bash` call for post-processing (file moves, ffmpeg) and one `Write` call to prepend frontmatter or append a hint. Mode `setup` may use one additional `Bash` call for the version/log check. Mode `ask` may use one Bash call before agy (mktemp) and one after (rm). Mode `review` may use one Bash call before (size check on DIFF_FILE + mktemp) and one after (rm of both temp dirs). Mode `report-generate` may use one Bash call for output dir setup and one after for image asset moves. +- Do NOT inspect the repository, read other files, grep, monitor progress, or do follow-up reasoning beyond what each mode requires. +- Do NOT paraphrase, summarize, or rewrite agy's output. Return it as-is. +- If agy errors out, return the error message verbatim. +- If the binary is missing, return the error and tell the caller to run `/agy:setup`. diff --git a/plugins/antigravity/commands/ask.md b/plugins/antigravity/commands/ask.md new file mode 100644 index 0000000..a300776 --- /dev/null +++ b/plugins/antigravity/commands/ask.md @@ -0,0 +1,37 @@ +--- +description: One-shot prompt to Antigravity (agy) — quick question, returns response verbatim. No file persistence to docs/. +argument-hint: "[--background] " +context: fork +allowed-tools: Bash, Write +--- + +Route this to the `antigravity:agy-rescue` subagent in MODE: ask. + +Raw user request: +$ARGUMENTS + +Routing rules: + +- If the request contains `--background`, invoke the subagent with `run_in_background: true`. Strip the flag from the forwarded prompt. +- Otherwise run the subagent in the foreground. +- What remains after stripping `--background` IS the prompt. Trim whitespace. + +Pass this header block to the subagent followed by the prompt: + +``` +MODE: ask +INTENSITY: +MODEL: +RESUME: false +WRITE_FILE: +USER_TEXT: + +``` + +Operating rules: + +- The subagent wraps the prompt with a `write_file` instruction (bug #76 workaround — see https://github.com/google-antigravity/antigravity-cli/issues/76). Agy writes the answer to a temp file, the subagent reads it back, returns the content verbatim. +- Present the response as-is. No commentary, no paraphrase. +- If the user did not supply a prompt, ask once: "What do you want to ask Antigravity?" +- If agy reports it is missing or unauthenticated, tell the user to run `/agy:setup`. +- For multi-step or long-running work, suggest `/agy:rescue` instead (carries the same write-to-file workaround but with a longer timeout and the option to resume conversations). diff --git a/plugins/antigravity/commands/design-review.md b/plugins/antigravity/commands/design-review.md new file mode 100644 index 0000000..349157b --- /dev/null +++ b/plugins/antigravity/commands/design-review.md @@ -0,0 +1,76 @@ +--- +description: Run a UX/visual design audit of a URL using Antigravity (agy). Captures desktop + mobile screenshots, scores 10 dimensions (hierarchy, typography, color, spacing, a11y, etc.), benchmarks against industry. Saves to docs/agy/design-reviews/. +argument-hint: " [focus instructions]" +context: fork +allowed-tools: Bash, Write +--- + +Route this to the `antigravity:agy-rescue` subagent in MODE: design-review. + +Raw user request: +$ARGUMENTS + +## Routing rules + +1. **Parse the URL**: the first whitespace-separated token of `$ARGUMENTS` that starts with `http://` or `https://` IS the URL. If the user wrote it without a scheme, prepend `https://`. If no URL is found, ask once: "What URL should I review the design of?". + +2. **Focus**: everything else in `$ARGUMENTS` is optional natural-language emphasis (e.g., "focus on the checkout flow on mobile", "compare against MercadoLibre seller backend"). Empty by default = full 10-dimension review at both viewports. + +3. **Slug**: build from URL host + first path segment (lowercase, non-alphanumeric → `-`, collapse repeats, trim to 60 chars). + +4. **Date**: today in `YYYY-MM-DD`. + +5. **Paths** (relative to current working directory): + - `OUTPUT_DIR` = `docs/agy/design-reviews/` + - `WRITE_FILE` = `docs/agy/design-reviews/-.md` + +6. **Pre-flight** (ONE `Bash` call): + ```bash + mkdir -p docs/agy/design-reviews + ``` + +7. **Hand off**: pass this header to the subagent: + + ``` + MODE: design-review + URL: + FOCUS: + CWD: + WRITE_FILE: docs/agy/design-reviews/-.md + ``` + +## Operating rules + +- The subagent invokes agy with `--add-dir ` and a long timeout (`12m0s`) because design reviews involve browser navigation + multiple viewports + multimodal vision analysis. +- The subagent reports: + 1. Saved report path. + 2. First ~30 lines (executive summary + design score). + 3. Paths to captured screenshots (desktop + mobile). +- Present that to the user as-is. +- If the URL is blocked, returns non-200, or browser session crashes, surface the error verbatim. + +## What the review covers + +Default 10-dimension audit: +1. Visual hierarchy +2. Typography (fonts, sizes, line-height, contrast) +3. Color system (palette, WCAG contrast, gradient usage) +4. Spacing & layout (grid, white space, alignment) +5. Interactive elements (button states, form affordance, micro-interactions) +6. Brand & tone (does design match the value-prop?) +7. Accessibility (alt text, focus indicators, ARIA, color contrast) +8. UX heuristics (Nielsen's 10) +9. Mobile/responsive behavior +10. Competitive context (if `--focus` includes a category, agy compares against industry standards) + +Ends with: +- 3 concrete strengths +- 3 highest-leverage improvements (ordered by ROI) +- Overall design score /10 + +## Notes + +- Uses agy's browser subagent under the hood — same `--add-dir` + write_file workaround as `/agy:record`. +- For login-protected pages, agy uses an isolated Chrome profile (no shared cookies). For demos behind auth, pass credentials in the focus text (be aware they end up in the prompt). +- For comparing TWO designs, run `/agy:design-review` twice and synthesize. (A `--compare` flag is a v0.4.0 candidate.) +- Captures both desktop (1440×900) and mobile (375×667) viewports by default. diff --git a/plugins/antigravity/commands/doc-to-md.md b/plugins/antigravity/commands/doc-to-md.md new file mode 100644 index 0000000..e90569f --- /dev/null +++ b/plugins/antigravity/commands/doc-to-md.md @@ -0,0 +1,67 @@ +--- +description: Convert a PDF, docx, image, or other document to clean Markdown using Antigravity (agy) multimodal Gemini. Saves to docs/agy/converted/. +argument-hint: " [extraction focus instructions]" +context: fork +allowed-tools: Bash, Write +--- + +Route this to the `antigravity:agy-rescue` subagent in MODE: doc-to-md. + +Raw user request: +$ARGUMENTS + +## Routing rules + +1. **Parse the file path**: the first whitespace-separated token of `$ARGUMENTS` that resolves to an existing file (check with `test -f`) IS the source file. Convert relative paths to absolute. If no valid file is found, ask once: "Which file should I convert?". + +2. **Extraction focus**: everything else in `$ARGUMENTS` (anything that's not the path) is optional natural-language guidance on what to focus on (e.g., "extract only the pricing tables and ignore the legal boilerplate"). Empty by default = "produce a faithful full-fidelity markdown conversion". + +3. **Detect file type** via extension: + - `.pdf` → PDF (multimodal) + - `.docx`, `.doc` → Word (multimodal or text) + - `.png`, `.jpg`, `.jpeg`, `.webp`, `.gif` → image (multimodal — OCR + structure inference) + - `.html`, `.htm` → HTML (text) + - other → text or warn + +4. **Slug**: lowercase the source filename base (without extension), non-alphanumeric → `-`, collapse repeats, trim to 60 chars. + +5. **Date**: today in `YYYY-MM-DD` (local time). + +6. **Paths** (relative to current working directory): + - `OUTPUT_DIR` = `docs/agy/converted/` + - `WRITE_FILE` = `docs/agy/converted/-.md` + +7. **Pre-flight** (ONE `Bash` call): + ```bash + mkdir -p docs/agy/converted + ``` + +8. **Hand off**: pass this header to the subagent: + + ``` + MODE: doc-to-md + SOURCE_FILE: + FILE_TYPE: pdf|docx|image|html|other + FOCUS: + CWD: + WRITE_FILE: docs/agy/converted/-.md + ``` + +## Operating rules + +- The subagent invokes agy with `--add-dir ` so it can write the result. +- For PDF / docx / image, agy uses Gemini's multimodal vision capabilities — preserves tables, lists, headings, code blocks. Inline images become `![description](image-extraction-note)` placeholders. +- The subagent reports: + 1. Saved markdown path. + 2. Original file path and detected type. + 3. First ~30 lines of the conversion (preview). + 4. Pages/sections processed. +- Present that to the user as-is. +- If the file is too large (>200 MB) or the format is unsupported, surface the error verbatim. Suggest splitting or pre-processing. + +## Notes + +- Large PDFs (>100 pages) consume significant input tokens. Consider chunking for very large docs. +- For images, OCR quality is excellent for printed text, decent for handwriting. +- This is NOT a replacement for `pandoc` if you need lossless format conversion — it's for "give me a clean markdown version I can read/edit/reuse". +- For sensitive documents (NDAs, internal contracts), be aware the content is sent to Google's Gemini servers. Use `vertex-creative` skill with Vertex AI ADC if you need stricter data residency control. diff --git a/plugins/antigravity/commands/media.md b/plugins/antigravity/commands/media.md new file mode 100644 index 0000000..a955f7d --- /dev/null +++ b/plugins/antigravity/commands/media.md @@ -0,0 +1,63 @@ +--- +description: Ask a question about an AUDIO, VIDEO or IMAGE file (or a YouTube/remote URL) with Antigravity (agy / Gemini 3.x multimodal). Beyond transcription — "what decisions were made in this meeting?", "what happens at 2:30 in the video?", "what's the tone of this voice note?". Claude Code can't see/hear media; agy can. +argument-hint: " | " +context: fork +allowed-tools: Bash, Read, Agent +--- + +Multimodal Q&A over a media file. Give `agy` an audio/video/image (or a YouTube/remote URL) and a +question; it answers grounded in what it heard/saw, with time references for audio/video. + +Raw user request: +$ARGUMENTS + +## Phase 0 — Resolve source + question (ONE Bash call) + +Parse `$ARGUMENTS`: split on the first `|` → left = source (a URL or an existing file), right = +question. If there is no `|`, the leading URL/existing-file token is the source and the rest is the +question. If the question is empty, ask once and stop. + +```bash +python - "$SOURCE" "$PREGUNTA" <<'PYEOF' +import sys, os, re, hashlib +src = sys.argv[1].strip().strip('"'); preg = (sys.argv[2] if len(sys.argv) > 2 else "").strip() +AUD=(".ogg",".mp3",".wav",".m4a",".aac",".flac",".opus",".aiff"); VID=(".mp4",".mov",".webm",".mkv",".avi",".m4v") +IMG=(".png",".jpg",".jpeg",".webp",".gif") +low=src.lower() +if low.startswith("http"): kind="url"; addir="" +elif low.endswith(VID): kind="video"; addir=os.path.dirname(os.path.abspath(src)) +elif low.endswith(AUD): kind="audio"; addir=os.path.dirname(os.path.abspath(src)) +elif low.endswith(IMG): kind="image"; addir=os.path.dirname(os.path.abspath(src)) +else: kind="file"; addir=os.path.dirname(os.path.abspath(src)) +base=re.sub(r"[^a-z0-9]+","-", (re.sub(r"^https?://(www\.)?","",low) if kind=="url" else os.path.splitext(os.path.basename(src))[0]).lower()).strip("-")[:40] or "media" +qh=hashlib.sha1(preg.encode()).hexdigest()[:6] +out=os.path.join("docs","agy","media", f"{base}-{qh}.md"); os.makedirs(os.path.dirname(out), exist_ok=True) +print(f"KIND={kind}\nADD_DIR={addir}\nWRITE_FILE={os.path.abspath(out)}\nSOURCE={src}") +PYEOF +``` + +## Phase 1 — Answer (ONE agy subagent) + +Spawn ONE `antigravity:agy-rescue` subagent in **MODE: media**: + +``` +MODE: media +CWD: +KIND: audio|video|image|url|file +SOURCE: +ADD_DIR: +PREGUNTA: +WRITE_FILE: +USER_TEXT: +(empty) +``` + +## Phase 2 — Present + +Read ONLY `` and present the answer verbatim, then the saved path. Don't re-process the +media yourself. + +## Notes +- For audio/video the answer cites **time references** (e.g. "around 02:30") when relevant. +- Need a full transcript instead of a targeted answer? Use `/agy:transcribe`. +- agy `--print` writes nothing to stdout outside a TTY (issue #76); the answer is read from the file. diff --git a/plugins/antigravity/commands/model.md b/plugins/antigravity/commands/model.md new file mode 100644 index 0000000..da04bc2 --- /dev/null +++ b/plugins/antigravity/commands/model.md @@ -0,0 +1,76 @@ +--- +description: Show or set the agy (Antigravity / Gemini) model by writing ~/.gemini/antigravity-cli/settings.json — the reliable way to switch models (the unreliable --model flag silently falls back to the default). No agy call; takes effect immediately, no plugin reinstall. +argument-hint: "[alias|\"exact label\"] e.g. flash-low · pro · \"Gemini 3.1 Pro (High)\"" +context: fork +allowed-tools: Bash +--- + +Show or change which model `agy --print` uses. agy reads the active model from +`~/.gemini/antigravity-cli/settings.json` (`"model": "