From 09c00a958adbec5020be62479685e3d3b791d390 Mon Sep 17 00:00:00 2001 From: Bruno Azoulay Date: Sat, 13 Jun 2026 11:32:29 +0200 Subject: [PATCH] feat(browser): hover + drag gestures, 5 new tools (pdf/cookies/route/permissions/clipboard), downloads read Closes the common interaction gaps: hover (open menus/tooltips) and drag (drag&drop) as browser_act kinds + run steps; browser_pdf, browser_cookies (inject/export auth token), browser_route (mock/abort a network response), browser_permissions, browser_clipboard. browser_downloads can now read file content (read+encoding). MCP tools 44 to 49. All verified live on real sites (hover reveal, drag, cookies round-trip, network mock, PDF bytes, clipboard read). Docs + README + count + CHANGELOG in the same commit. --- CHANGELOG.md | 9 ++ README.md | 4 +- docs/README.md | 2 +- docs/cli.md | 7 +- docs/configuration.md | 2 +- docs/mcp-tools.md | 148 +++++++++++++++++++++++++++- package.json | 2 +- src/actions/act-by-ref.ts | 21 +++- src/actions/hover-drag.ts | 49 +++++++++ src/actions/perform.ts | 5 + src/interfaces/types.ts | 2 + src/server/registry.ts | 10 ++ src/server/tools/clipboard.ts | 82 +++++++++++++++ src/server/tools/cookies.ts | 76 ++++++++++++++ src/server/tools/dialogs.ts | 19 +++- src/server/tools/pdf.ts | 72 ++++++++++++++ src/server/tools/permissions.ts | 66 +++++++++++++ src/server/tools/route.ts | 85 ++++++++++++++++ src/server/tools/run-act.ts | 9 +- src/server/tools/snapshot.ts | 3 +- src/session/download-read.ts | 54 ++++++++++ tests/integration/mcp.test.ts | 5 + tests/live/live-browser-tools.ts | 48 +++++++++ tests/live/live-gestures.ts | 40 ++++++++ tests/unit/clipboard-tool.test.ts | 47 +++++++++ tests/unit/cookies-tool.test.ts | 97 ++++++++++++++++++ tests/unit/download-read.test.ts | 76 ++++++++++++++ tests/unit/hover-drag.test.ts | 94 ++++++++++++++++++ tests/unit/pdf-tool.test.ts | 80 +++++++++++++++ tests/unit/permissions-tool.test.ts | 41 ++++++++ tests/unit/route-tool.test.ts | 64 ++++++++++++ 31 files changed, 1300 insertions(+), 19 deletions(-) create mode 100644 src/actions/hover-drag.ts create mode 100644 src/server/tools/clipboard.ts create mode 100644 src/server/tools/cookies.ts create mode 100644 src/server/tools/pdf.ts create mode 100644 src/server/tools/permissions.ts create mode 100644 src/server/tools/route.ts create mode 100644 src/session/download-read.ts create mode 100644 tests/live/live-browser-tools.ts create mode 100644 tests/live/live-gestures.ts create mode 100644 tests/unit/clipboard-tool.test.ts create mode 100644 tests/unit/cookies-tool.test.ts create mode 100644 tests/unit/download-read.test.ts create mode 100644 tests/unit/hover-drag.test.ts create mode 100644 tests/unit/pdf-tool.test.ts create mode 100644 tests/unit/permissions-tool.test.ts create mode 100644 tests/unit/route-tool.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 63678dc..3d6a073 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [0.1.59] - 13-06-2026 + +### Added + +- **Two new interaction gestures** — `hover` (open menus/tooltips on mouse-over) and `drag` (drag&drop, `target`→`to`) as `browser_act` kinds and `run` step types. Closes the most common "the agent sees the link but the click fails because the hover menu never opened" gap. +- **Five new MCP tools** (44 → **49**): `browser_pdf` (page → PDF, headless Chromium), `browser_cookies` (get/set/clear — inject or export an auth token without a UI login), `browser_route` (mock/abort/unroute a network response — stub an API without a live backend), `browser_permissions` (grant/clear geolocation, clipboard, notifications…), `browser_clipboard` (read/write). +- **`browser_downloads` now reads file content** — pass `read` (index or filename) + `encoding` (utf8/base64) to get the downloaded bytes, not just the path (5 MB cap). +- All verified live: hover reveal, drag, cookies round-trip, network mock (`fulfill`), PDF bytes, clipboard write/read. + ## [0.1.58] - 11-06-2026 ### Added diff --git a/README.md b/README.md index 8d5846d..7bfe249 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Shadow DOM + iframes), multi-step plans, structured extraction, visual diff, and guardrails** for payments and bookings. It drives real Chromium, so it reads **Next.js / SPA** pages after hydration — not just static HTML. -> 44 MCP tools · stealth + rotating proxies · HTTP fast-path (single, batch & crawl) · full-site content + screenshot snapshots · structured per-card product extraction · form fill + file upload · virtualized-list scraping + autoscroll · tabs / dialogs / downloads · console + network logs · MCP screenshot resources · `FUSE_CAPS` tool-group filtering · named auth profiles (cookies + localStorage + IndexedDB, saved at login) · `blockResources` · HAR record/replay · pixel visual-diff · human handoff + live view. +> 49 MCP tools · stealth + rotating proxies · HTTP fast-path (single, batch & crawl) · full-site content + screenshot snapshots · structured per-card product extraction · form fill + file upload · hover + drag&drop · PDF export · cookies / permissions · network mocking · clipboard · virtualized-list scraping + autoscroll · tabs / dialogs / downloads · console + network logs · MCP screenshot resources · `FUSE_CAPS` tool-group filtering · named auth profiles (cookies + localStorage + IndexedDB, saved at login) · `blockResources` · HAR record/replay · pixel visual-diff · human handoff + live view. ## Install @@ -44,7 +44,7 @@ fuse-browser products "https://www.digitec.ch/en/search?q=macbook" --limit 20 An LLM runs a **perceive → decide → act** loop through the tools: `browser_open` → `browser_navigate` → `browser_snapshot` (indexed `ref`s + form state) → `browser_act` -(click/fill/select/pick/upload, returns a page diff) → `browser_wait_for` → `browser_autoscroll` +(click/fill/select/pick/upload/hover/drag, returns a page diff) → `browser_wait_for` → `browser_autoscroll` (drain lazy lists) → `browser_products` / `browser_collect` / `browser_extract` / `browser_screenshot`. Sensitive actions (pay / book / checkout) are **blocked** unless the agent passes `humanApproved`. diff --git a/docs/README.md b/docs/README.md index 0f476f4..f5d77a7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,7 +7,7 @@ New here? Start with the root [README](../README.md), then dive in: | --- | --- | | [Installation](./installation.md) | Requirements, install, Chromium, MCP registration, the three ways to get a browser | | [CLI](./cli.md) | `probe` / `fetch` / `fetch-batch` / `crawl` / `collect-batch` / `shots` / `shots-batch` / `site-shots` / `serp-batch` + one-shot page commands (`run` / `products` / `extract` / `snapshot` / `screenshot` / `inspect`) + every flag | -| [MCP tools](./mcp-tools.md) | All 44 tools with parameters and examples | +| [MCP tools](./mcp-tools.md) | All 49 tools with parameters and examples | | [Configuration](./configuration.md) | `AgentOptions`, `FUSE_*` env vars, identity, retry, output location | | [Sessions](./sessions.md) | Session lifecycle, auto crash recovery, `storageState` auto-save, HAR record/replay, CDP attach | | [Extraction](./extraction.md) | `browser_extract` / `extract_schema` / `collect` + the clean→validate→dedupe→emit pipeline | diff --git a/docs/cli.md b/docs/cli.md index 0d8a7cc..be07c40 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -115,7 +115,7 @@ These commands open a page, run one operation, print JSON on stdout (errors on s ### `run ` -Executes a multi-step plan in one session. Steps come from `--steps ''` (inline array) or `--steps-file ` (`-` reads stdin). Each step is `{type, …}`: `navigate`, `click`, `fill`, `scroll`, `press`, `wait`, `select`, `upload`, `extract`. An `upload` step is `{"type":"upload","target":"","files":""}` — `files` accepts one path, a comma-separated string, or an array, and is set on the matching ``. Prints `{ok, url, steps}`; on a failed step prints `{ok:false, error:{kind:"step_failed", step, message}}` and exits `1`. Malformed/non-array JSON exits `2`. +Executes a multi-step plan in one session. Steps come from `--steps ''` (inline array) or `--steps-file ` (`-` reads stdin). Each step is `{type, …}`: `navigate`, `click`, `fill`, `scroll`, `press`, `wait`, `select`, `upload`, `hover`, `drag`, `extract`. An `upload` step is `{"type":"upload","target":"","files":""}` — `files` accepts one path, a comma-separated string, or an array, and is set on the matching ``. A `hover` step is `{"type":"hover","target":""}` (moves the pointer over the element to reveal hover menus/tooltips). A `drag` step is `{"type":"drag","target":"","to":""}` (drops the source onto the destination). Prints `{ok, url, steps}`; on a failed step prints `{ok:false, error:{kind:"step_failed", step, message}}` and exits `1`. Malformed/non-array JSON exits `2`. ```bash fuse-browser run https://example.com \ @@ -127,6 +127,11 @@ fuse-browser run https://example.com/apply \ --steps '[{"type":"upload","target":"input[type=file]","files":"/path/cv.pdf"}]' ``` +```bash +fuse-browser run https://example.com/board \ + --steps '[{"type":"hover","target":".menu-trigger"},{"type":"drag","target":"#card-1","to":"#column-done"}]' +``` + ### `products ` Extracts repeated product cards from the rendered DOM. `--limit ` caps the result; `--container ` forces the card container. Prints `{url, count, products}`. diff --git a/docs/configuration.md b/docs/configuration.md index 963254d..fca35c6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -78,7 +78,7 @@ Read by `envAgentDefaults` (`src/server/env-defaults.ts`) and the proxy loader ( | `FUSE_STORAGE_STATE` | `storageStatePath` | Path to a storage-state JSON. | | `FUSE_OUTPUT_DIR` | `outputDir` | Override the artifact output directory. | | `FUSE_PROXIES` | proxy pool | Comma- or newline-separated proxy URLs; deduped, blanks dropped. Merged with `proxiesPath`. Treat as a secret. | -| `FUSE_CAPS` | tool-group filter | Comma-separated [capability groups](./mcp-tools.md#capability-groups-fuse_caps) to register (`core`/`batch`/`extract`/`debug`/`live`). Case-insensitive, whitespace-tolerant; unknown names are ignored. Blank/unset (or only-unknown) = all 44 tools. Server-only (no per-call/library equivalent). | +| `FUSE_CAPS` | tool-group filter | Comma-separated [capability groups](./mcp-tools.md#capability-groups-fuse_caps) to register (`core`/`batch`/`extract`/`debug`/`live`). Case-insensitive, whitespace-tolerant; unknown names are ignored. Blank/unset (or only-unknown) = all 49 tools. Server-only (no per-call/library equivalent). | | `FUSE_NETLOG_MAX` | network/console log cap | Max entries kept per session in `browser_console` / `browser_network` (oldest dropped). Positive integer; default `250`. | ### MCP config example diff --git a/docs/mcp-tools.md b/docs/mcp-tools.md index def2bca..505ddaa 100644 --- a/docs/mcp-tools.md +++ b/docs/mcp-tools.md @@ -14,14 +14,14 @@ The shared identity/profile options (the `agentOptionShape`) are listed once und ## Capability groups (`FUSE_CAPS`) -By default all 44 tools are registered. Set the `FUSE_CAPS` env var (comma-separated group names) to expose fewer tools — a lighter context for the LLM client: +By default all 49 tools are registered. Set the `FUSE_CAPS` env var (comma-separated group names) to expose fewer tools — a lighter context for the LLM client: | Group | Tools | | --- | --- | | `core` | Session lifecycle (`browser_open`/`browser_status`/`browser_close`/`browser_connect`), navigation (`browser_navigate`/`browser_back`/`browser_forward`), actions (`browser_click`/`browser_fill`/`browser_login`/`browser_scroll`/`browser_press`/`browser_select`), `browser_tabs`, `browser_dialog`/`browser_downloads`, `browser_snapshot`/`browser_act`, `browser_wait`/`browser_wait_for`, `browser_screenshot`, `browser_autoscroll`. | | `batch` | `browser_probe`, `browser_probe_html`, `browser_fetch`, `browser_fetch_batch`, `browser_crawl`, `browser_collect_batch`, `browser_shots_batch`, `browser_site_shots`, `browser_serp_batch`. | | `extract` | `browser_collect`, `browser_run`, `browser_extract`, `browser_extract_schema`, `browser_products`. | -| `debug` | `browser_inspect`, `browser_console`, `browser_network`, `browser_visual_diff`, `browser_metrics`. | +| `debug` | `browser_inspect`, `browser_console`, `browser_network`, `browser_visual_diff`, `browser_metrics`, `browser_pdf`, `browser_cookies`. | | `live` | `browser_handoff`, `browser_live_view`, `browser_live_view_stop`. | ```sh @@ -346,13 +346,25 @@ List the files downloaded by this session. Download capture is auto-attached whe | Param | Type | Required | Description | | --- | --- | --- | --- | | `sessionId` | string | yes | Target session. | +| `read` | number \| string | no | Index in the list, or a `suggestedFilename`, of the download whose content to also return. | +| `encoding` | `"utf8"` \| `"base64"` | no | How to decode the read file. Default `utf8`. Use `base64` for binary files (PDF, images, archives). | Returns `{ count, downloads }` — each download is `{ url, suggestedFilename, path, at, error? }` (`path` is empty while saving or when `error` is set). +When `read` is given, the result also includes `content: { filename, encoding, data }`. Files over 5 MB, an invalid index/filename, or a file not yet on disk return an error (`code: DOWNLOAD_READ_FAILED`). Without `read`, the behaviour is unchanged (list only). + ```json { "sessionId": "s_abc123" } ``` +```json +{ "sessionId": "s_abc123", "read": 0, "encoding": "utf8" } +``` + +```json +{ "sessionId": "s_abc123", "read": "invoice.pdf", "encoding": "base64" } +``` + --- ## Navigate @@ -536,21 +548,26 @@ Return the indexed interactive elements of the live page, each with a `ref` to u ### browser_act -Execute click/fill/select/pick/upload on an element by `ref` (from `browser_snapshot`) or by `target` text. Returns a diff of what changed on the page (added/removed/text/url). +Execute click/fill/select/pick/upload/hover/drag on an element by `ref` (from `browser_snapshot`) or by `target` text. Returns a diff of what changed on the page (added/removed/text/url). `pick` = type `value` into a combobox, then click the matching suggestion (`option` text, defaults to `value`) — for airport/city autocompletes. `upload` = set local file path(s) on an `` via `files` (single path, a comma-separated string, or an array). Resolves the same `ref`/`target` as the other kinds, then calls Playwright's `setInputFiles`. +`hover` = move the pointer over the element (Playwright's `locator.hover()`) — reveals hover menus/tooltips so a follow-up `browser_snapshot` sees the newly-shown elements. + +`drag` = drag the source element (`ref`/`target`) onto a destination given by `to` (a snapshot `ref` or a CSS selector), via Playwright's `locator.dragTo()`. + | Param | Type | Required | Description | | --- | --- | --- | --- | | `sessionId` | string | yes | Target session. | -| `kind` | enum `click` \| `fill` \| `select` \| `pick` \| `upload` | yes | Action to perform. | +| `kind` | enum `click` \| `fill` \| `select` \| `pick` \| `upload` \| `hover` \| `drag` | yes | Action to perform. | | `ref` | integer \| string | no | Element ref from `browser_snapshot` (e.g. `12` or `"3:4"`). | | `target` | string | no | Text/selector fallback when no `ref`. | | `value` | string | no | Value to type/select (for `fill`/`select`/`pick`). | | `option` | string | no | Suggestion text to click for `pick` (defaults to `value`). | | `files` | string \| string[] | no | File path(s) for `upload` — one path, a comma-separated string (split into many), or an array. | +| `to` | string | no | Drop destination for `drag` — a snapshot `ref` (e.g. `"7"`) or a CSS selector. | | `annotate` | boolean | no | Also return a Set-of-Marks JPEG of the NEW state (re-marked, anti-drift). | Provide either `ref` or `target`. @@ -563,6 +580,14 @@ Provide either `ref` or `target`. { "sessionId": "s_abc123", "kind": "upload", "target": "input[type=file]", "files": "/path/cv.pdf" } ``` +```json +{ "sessionId": "s_abc123", "kind": "hover", "ref": 5 } +``` + +```json +{ "sessionId": "s_abc123", "kind": "drag", "ref": 3, "to": "7" } +``` + ### browser_run Execute an ordered multi-step plan (navigate, click, fill, scroll, press, select, upload, wait_for, extract) in one call. Stops at the first failed step. Sensitive actions require `humanApproved`. An `upload` step takes `{type:"upload", target, files}` where `files` is a path, a comma-separated string, or an array. @@ -821,6 +846,121 @@ Returns `{ count, requests }`. { "sessionId": "s_abc123", "status": 404 } ``` +### browser_pdf + +Render the live page to PDF. **Headless chromium only** — Playwright's `page.pdf()` throws in headed mode or on a non-chromium engine; in that case the tool returns a clear error (`code: pdf_unsupported`, message `browser_pdf requires headless chromium`). With `path` the PDF is written to disk; otherwise it is returned base64-encoded. + +| Param | Type | Required | Description | +| --- | --- | --- | --- | +| `sessionId` | string | yes | Target session. | +| `path` | string | no | If set, write the PDF here and return `{ path }`. | +| `format` | string | no | Paper format (`Letter`, `Legal`, `Tabloid`, `Ledger`, `A0`–`A6`). | +| `landscape` | boolean | no | Landscape orientation. | +| `printBackground` | boolean | no | Print background graphics. | + +Returns `{ path, bytes }` when `path` is given, otherwise `{ pdfBase64, bytes }`. + +```json +{ "sessionId": "s_abc123", "format": "A4", "printBackground": true } +``` + +```json +{ "sessionId": "s_abc123", "path": "/tmp/page.pdf", "landscape": true } +``` + +### browser_cookies + +Read, set, or clear cookies on this session's `BrowserContext`. Cookies use the Playwright shape (`{ name, value, domain?, url?, path?, expires?, httpOnly?, secure?, sameSite? }`); either `url` or both `domain` and `path` are required per cookie when setting. Errors surface as `code: cookies_failed`. + +| Param | Type | Required | Description | +| --- | --- | --- | --- | +| `sessionId` | string | yes | Target session. | +| `action` | enum `get` \| `set` \| `clear` | yes | Operation to perform. | +| `cookies` | array | for `set` | Playwright cookies to add. | +| `urls` | string[] | no | For `get`: return only cookies that affect these URLs. | + +Returns `{ cookies }` for `get`, `{ added: n }` for `set`, `{ cleared: true }` for `clear`. + +```json +{ "sessionId": "s_abc123", "action": "get", "urls": ["https://example.com"] } +``` + +```json +{ "sessionId": "s_abc123", "action": "set", "cookies": [{ "name": "sid", "value": "abc", "url": "https://example.com" }] } +``` + +```json +{ "sessionId": "s_abc123", "action": "clear" } +``` + +### browser_route + +Intercept network requests matching a glob/URL `pattern` on this session. `mock` fulfills matching requests with a canned response (`status` / `body` / `contentType`); `abort` blocks them; `unroute` removes a route previously installed with the same pattern. Patterns are Playwright globs, e.g. `**/api/**` or `https://x/*`. + +| Param | Type | Required | Description | +| --- | --- | --- | --- | +| `sessionId` | string | yes | Target session. | +| `pattern` | string | yes | Playwright URL glob to match. | +| `action` | enum `mock` \| `abort` \| `unroute` | yes | Fulfill, block, or remove the route. | +| `status` | number | no | HTTP status for `mock` (Playwright default `200`). | +| `body` | string | no | Response body for `mock`. | +| `contentType` | string | no | `Content-Type` header for `mock`. | + +Returns `{ routed, action }`. + +```json +{ "sessionId": "s_abc123", "pattern": "**/api/me", "action": "mock", "status": 200, "body": "{\"id\":1}", "contentType": "application/json" } +``` + +```json +{ "sessionId": "s_abc123", "pattern": "**/analytics/**", "action": "abort" } +``` + +```json +{ "sessionId": "s_abc123", "pattern": "**/api/me", "action": "unroute" } +``` + +### browser_permissions + +Grant or clear runtime browser permissions on this session. `grant` (default) allows the listed permissions, optionally scoped to a single `origin`; `clear` revokes every permission granted so far. Names follow Playwright: `geolocation`, `notifications`, `clipboard-read`, `clipboard-write`, `camera`, `microphone`, `midi`, etc. + +| Param | Type | Required | Description | +| --- | --- | --- | --- | +| `sessionId` | string | yes | Target session. | +| `permissions` | string[] | no | Permissions to grant (default `[]`; ignored on `clear`). | +| `origin` | string | no | Scope the grant to this origin (`grant` only). | +| `action` | enum `grant` \| `clear` | no | Default `grant`. | + +Returns `{ granted, origin }` for `grant`, `{ cleared: true }` for `clear`. + +```json +{ "sessionId": "s_abc123", "permissions": ["geolocation", "clipboard-read", "clipboard-write"], "origin": "https://example.com" } +``` + +```json +{ "sessionId": "s_abc123", "action": "clear" } +``` + +### browser_clipboard + +Read from or write to the page clipboard via `navigator.clipboard`. `read` returns the current text; `write` sets it to `text`. The clipboard API needs the `clipboard-read` / `clipboard-write` permissions — they are granted best-effort before the call, but if the browser still denies access the tool returns `code: clipboard_denied`; grant them explicitly with `browser_permissions` first. + +| Param | Type | Required | Description | +| --- | --- | --- | --- | +| `sessionId` | string | yes | Target session. | +| `action` | enum `read` \| `write` | yes | Read from or write to the clipboard. | +| `text` | string | for `write` | Text to put on the clipboard. | + +Returns `{ text }` for `read`, `{ written: true }` for `write`. + +```json +{ "sessionId": "s_abc123", "action": "write", "text": "copied value" } +``` + +```json +{ "sessionId": "s_abc123", "action": "read" } +``` + --- ## Live view diff --git a/package.json b/package.json index 13a03f3..93d097a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@fusengine/browser-mcp", - "version": "0.1.58", + "version": "0.1.59", "description": "MCP server + CLI giving AI agents a real, stealth browser (Patchright/Playwright) — per-country identity, self-healing actions, snapshots, multi-step plans, structured extraction, CDP attach.", "license": "MIT", "author": "Fusengine", diff --git a/src/actions/act-by-ref.ts b/src/actions/act-by-ref.ts index 740499b..5e28b32 100644 --- a/src/actions/act-by-ref.ts +++ b/src/actions/act-by-ref.ts @@ -4,14 +4,25 @@ * text/heuristic targeting of smart-click/smart-fill. * @module actions/act-by-ref */ -import type { Page } from "playwright"; +import type { Locator, Page } from "playwright"; import type { ActionResult } from "../interfaces/types.js"; import { pickAutocomplete } from "./autocomplete.js"; +import { dragLocator, hoverLocator } from "./hover-drag.js"; import { refLocator } from "./ref-locator.js"; import { type FilesInput, setFiles } from "./upload.js"; /** Action kinds that can target a snapshot ref. */ -export type RefActionKind = "click" | "fill" | "select" | "pick" | "upload"; +export type RefActionKind = "click" | "fill" | "select" | "pick" | "upload" | "hover" | "drag"; + +/** Snapshot refs are numeric (`"5"` or frame-scoped `"3:4"`); anything else is a CSS selector. */ +function isRef(value: string): boolean { + return /^\d+(:\d+)?$/.test(value); +} + +/** Resolve a drag destination: a snapshot ref via {@link refLocator}, else a CSS selector. */ +function destLocator(page: Page, to: string): Locator { + return (isRef(to) ? refLocator(page, to) : null) ?? page.locator(to).first(); +} /** * Run `kind` on the element carrying the frame-scoped `ref`. @@ -22,6 +33,7 @@ export type RefActionKind = "click" | "fill" | "select" | "pick" | "upload"; * @param value - Text for fill/select/pick (ignored for click/upload). * @param option - Suggestion text for `pick`. * @param files - Paths for `upload` (string, CSV string, or array). + * @param to - Destination ref/selector for `drag` (ref string/number or CSS selector). * @returns Action result tagged with the `ref`. */ export async function actByRef( @@ -31,6 +43,7 @@ export async function actByRef( value = "", option = "", files: FilesInput = "", + to = "", ): Promise { const locator = refLocator(page, ref); try { @@ -39,6 +52,10 @@ export async function actByRef( } if (kind === "pick") return { ...(await pickAutocomplete(page, locator, value, option)), ref }; if (kind === "upload") return { ...(await setFiles(locator, files)), ref }; + if (kind === "hover") return { ...(await hoverLocator(locator)), ref }; + if (kind === "drag") { + return { ...(await dragLocator(locator, destLocator(page, to))), ref, to }; + } if (kind === "click") { await locator.click({ timeout: 5_000 }); } else if (kind === "fill") { diff --git a/src/actions/hover-drag.ts b/src/actions/hover-drag.ts new file mode 100644 index 0000000..8cbc37e --- /dev/null +++ b/src/actions/hover-drag.ts @@ -0,0 +1,49 @@ +/** + * Hover and drag-and-drop actions. Shared by `performAction` (run steps), + * the snapshot `browser_act` ref/target path, and `actByRef`. `hover` moves the + * pointer over an element; `drag` drops a source element onto a destination. + * @module actions/hover-drag + */ +import type { Locator, Page } from "playwright"; +import type { ActionResult } from "../interfaces/types.js"; + +/** Move the pointer over an already-resolved locator. */ +export async function hoverLocator(locator: Locator): Promise { + try { + await locator.hover({ timeout: 5_000 }); + return { type: "hover", ok: true }; + } catch (err) { + return { type: "hover", ok: false, error: String(err).split("\n")[0] ?? "error" }; + } +} + +/** Resolve `target` to its first match and hover it. */ +export async function hover(page: Page, target: string): Promise { + if (!target) return { type: "hover", ok: false, error: "no_target" }; + return { ...(await hoverLocator(page.locator(target).first())), target }; +} + +/** Drag an already-resolved source locator onto a destination locator. */ +export async function dragLocator(source: Locator, destination: Locator): Promise { + try { + await source.dragTo(destination, { timeout: 10_000 }); + return { type: "drag", ok: true }; + } catch (err) { + return { type: "drag", ok: false, error: String(err).split("\n")[0] ?? "error" }; + } +} + +/** + * Drag the `target` element onto the `to` destination. + * + * @param page - Active Playwright page. + * @param target - Selector for the source element to grab. + * @param to - Selector for the destination to drop onto. + * @returns Action result tagged with `target` and `to`. + */ +export async function drag(page: Page, target: string, to: string): Promise { + if (!target) return { type: "drag", ok: false, error: "no_target" }; + if (!to) return { type: "drag", ok: false, error: "no_destination" }; + const r = await dragLocator(page.locator(target).first(), page.locator(to).first()); + return { ...r, target, to }; +} diff --git a/src/actions/perform.ts b/src/actions/perform.ts index ed781fa..0df63ef 100644 --- a/src/actions/perform.ts +++ b/src/actions/perform.ts @@ -5,6 +5,7 @@ import type { Page } from "playwright"; import type { ActionResult } from "../interfaces/types.js"; import { pickAutocomplete } from "./autocomplete.js"; +import { drag, hover } from "./hover-drag.js"; import { login, type LoginAction } from "./login.js"; import { navigateHistory, pressKey, scroll, selectOption } from "./navigation.js"; import { smartClick } from "./smart-click.js"; @@ -40,6 +41,10 @@ export async function performAction( return selectOption(page, target, String(action.value ?? "")); case "upload": return uploadFiles(page, target, (action.files ?? action.value ?? "") as FilesInput); + case "hover": + return hover(page, target); + case "drag": + return drag(page, target, String(action.to ?? "")); case "pick": return pickAutocomplete(page, page.locator(target).first(), String(action.value ?? ""), String(action.option ?? "")); case "back": diff --git a/src/interfaces/types.ts b/src/interfaces/types.ts index 88c9d0c..451d9a6 100644 --- a/src/interfaces/types.ts +++ b/src/interfaces/types.ts @@ -31,6 +31,8 @@ export type BrowserAction = password?: string; } | { type: "upload"; target: string; files: string | string[] } + | { type: "hover"; target: string } + | { type: "drag"; target: string; to: string } | { type: "wait"; ms?: number }; /** Normalized result of an action. */ diff --git a/src/server/registry.ts b/src/server/registry.ts index 8c058fe..3b38faa 100644 --- a/src/server/registry.ts +++ b/src/server/registry.ts @@ -9,8 +9,10 @@ import type { CapGroup } from "./caps.js"; import { registerActTools } from "./tools/act.js"; import { registerAutoScrollTool } from "./tools/autoscroll.js"; import { registerCollectTool } from "./tools/collect.js"; +import { registerClipboardTool } from "./tools/clipboard.js"; import { registerCollectBatchTool } from "./tools/collect-batch.js"; import { registerConnectTool } from "./tools/connect.js"; +import { registerCookiesTool } from "./tools/cookies.js"; import { registerCrawlTool } from "./tools/crawl.js"; import { registerDialogTools } from "./tools/dialogs.js"; import { registerExtractTool } from "./tools/extract.js"; @@ -23,8 +25,11 @@ import { registerLiveViewTool } from "./tools/live-view.js"; import { registerLogTools } from "./tools/logs.js"; import { registerMetricsTool } from "./tools/metrics.js"; import { registerNavigateTool } from "./tools/navigate.js"; +import { registerPdfTool } from "./tools/pdf.js"; +import { registerPermissionsTool } from "./tools/permissions.js"; import { registerProbeTools } from "./tools/probe.js"; import { registerProductsTool } from "./tools/products.js"; +import { registerRouteTool } from "./tools/route.js"; import { registerRunTool } from "./tools/run.js"; import { registerScreenshotTool } from "./tools/screenshot.js"; import { registerSerpBatchTool } from "./tools/serp-batch.js"; @@ -76,6 +81,11 @@ export function toolGroups( () => registerLogTools(server, sessions), () => registerVisualDiffTool(server, sessions), () => registerMetricsTool(server), + () => registerPdfTool(server, sessions), + () => registerCookiesTool(server, sessions), + () => registerRouteTool(server, sessions), + () => registerPermissionsTool(server, sessions), + () => registerClipboardTool(server, sessions), ], live: [ () => registerHandoffTool(server, sessions), diff --git a/src/server/tools/clipboard.ts b/src/server/tools/clipboard.ts new file mode 100644 index 0000000..34323f3 --- /dev/null +++ b/src/server/tools/clipboard.ts @@ -0,0 +1,82 @@ +/** + * `browser_clipboard`: read from or write to the page clipboard via the + * `navigator.clipboard` API. Requires the clipboard-read/clipboard-write + * permissions — granted best-effort here, but you can also call + * `browser_permissions` first to grant them explicitly. + * @module server/tools/clipboard + */ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { BrowserContext, Page } from "playwright"; +import { z } from "zod"; +import { evalScript, evalScriptArg } from "../../lib/evaluate.js"; +import type { SessionManager } from "../../session/manager.js"; +import { errorResult, jsonResult } from "../result.js"; +import { withSession } from "./with-session.js"; + +const WRITE_SCRIPT = "(t) => navigator.clipboard.writeText(t)"; +const READ_SCRIPT = "() => navigator.clipboard.readText()"; + +/** read returns clipboard text; write puts `text` on the clipboard. */ +export type ClipboardAction = "read" | "write"; + +const DESC = + "Read from or write to the page clipboard (navigator.clipboard). `read` returns the current " + + "text; `write` sets it to `text`. Needs clipboard permissions — granted best-effort, or call " + + "browser_permissions with clipboard-read/clipboard-write first if the browser denies it."; + +/** Best-effort grant of clipboard permissions (failures are ignored). */ +async function ensureClipboard(context: BrowserContext): Promise { + await context.grantPermissions(["clipboard-read", "clipboard-write"]).catch(() => {}); +} + +/** + * Read or write the page clipboard via `navigator.clipboard`. + * @param page - The session page. + * @param action - read or write. + * @param text - Text to write (required for `write`). + * @returns The clipboard text on read, or `undefined` on write. + */ +export async function applyClipboard( + page: Page, + action: ClipboardAction, + text?: string, +): Promise { + if (action === "write") { + await evalScriptArg(page, WRITE_SCRIPT, text ?? ""); + return undefined; + } + return evalScript(page, READ_SCRIPT); +} + +/** Register `browser_clipboard`. */ +export function registerClipboardTool(server: McpServer, sessions: SessionManager): void { + server.registerTool( + "browser_clipboard", + { + title: "Read or write clipboard", + description: DESC, + inputSchema: { + sessionId: z.string(), + action: z.enum(["read", "write"]), + text: z.string().optional(), + }, + }, + async (args) => { + const a = args as Record; + return withSession(sessions, String(a.sessionId), async (s) => { + const action = a.action as ClipboardAction; + await ensureClipboard(s.context); + try { + const text = await applyClipboard(s.page, action, a.text as string | undefined); + return action === "write" ? jsonResult({ written: true }) : jsonResult({ text }); + } catch (err) { + const why = err instanceof Error ? err.message : String(err); + return errorResult( + `clipboard_denied: ${why} — grant clipboard-read/clipboard-write via browser_permissions`, + "clipboard_denied", + ); + } + }); + }, + ); +} diff --git a/src/server/tools/cookies.ts b/src/server/tools/cookies.ts new file mode 100644 index 0000000..b29e26b --- /dev/null +++ b/src/server/tools/cookies.ts @@ -0,0 +1,76 @@ +/** + * `browser_cookies`: read, set, or clear cookies on a live session's context. + * Cookies use the Playwright shape ({name,value,domain?,url?,path?,…}); `get` + * may be filtered by `urls`. + * @module server/tools/cookies + */ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { BrowserContext } from "playwright"; +import { z } from "zod"; +import type { SessionManager } from "../../session/manager.js"; +import { errorResult, jsonResult } from "../result.js"; +import { withSession } from "./with-session.js"; + +/** A Playwright cookie object as accepted by `context.addCookies()`. */ +type CookieInput = Parameters[0][number]; + +const COOKIE_SCHEMA = z + .object({ + name: z.string(), + value: z.string(), + url: z.string().optional(), + domain: z.string().optional(), + path: z.string().optional(), + expires: z.number().optional(), + httpOnly: z.boolean().optional(), + secure: z.boolean().optional(), + sameSite: z.enum(["Strict", "Lax", "None"]).optional(), + }) + .passthrough(); + +/** Run the requested cookie action against `context`. */ +async function runAction( + context: BrowserContext, + a: Record, +): Promise> { + const action = a.action as "get" | "set" | "clear"; + if (action === "set") { + const cookies = (a.cookies ?? []) as CookieInput[]; + await context.addCookies(cookies); + return { added: cookies.length }; + } + if (action === "clear") { + await context.clearCookies(); + return { cleared: true }; + } + const urls = a.urls as string[] | undefined; + return { cookies: await context.cookies(urls) }; +} + +/** Register `browser_cookies`. */ +export function registerCookiesTool(server: McpServer, sessions: SessionManager): void { + server.registerTool( + "browser_cookies", + { + title: "Cookies", + description: + "Read, set, or clear cookies on this session's context. `get` returns {cookies} (optionally filtered by `urls`); `set` adds Playwright cookies and returns {added}; `clear` removes all and returns {cleared}.", + inputSchema: { + sessionId: z.string(), + action: z.enum(["get", "set", "clear"]), + cookies: z.array(COOKIE_SCHEMA).optional(), + urls: z.array(z.string()).optional(), + }, + }, + async (args) => { + const a = args as Record; + return withSession(sessions, String(a.sessionId), async (s) => { + try { + return jsonResult(await runAction(s.context, a)); + } catch (err) { + return errorResult(err instanceof Error ? err.message : String(err), "cookies_failed"); + } + }); + }, + ); +} diff --git a/src/server/tools/dialogs.ts b/src/server/tools/dialogs.ts index ad24265..a7621a3 100644 --- a/src/server/tools/dialogs.ts +++ b/src/server/tools/dialogs.ts @@ -6,9 +6,10 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { attachDialogs, recentDialogs, setDialogPolicy } from "../../session/dialogs.js"; +import { readDownload } from "../../session/download-read.js"; import { attachDownloads, listDownloads } from "../../session/downloads.js"; import type { SessionManager } from "../../session/manager.js"; -import { jsonResult } from "../result.js"; +import { errorResult, jsonResult } from "../result.js"; import { withSession } from "./with-session.js"; /** Register `browser_dialog` and `browser_downloads`. */ @@ -44,8 +45,13 @@ export function registerDialogTools(server: McpServer, sessions: SessionManager) "browser_downloads", { title: "List downloads", - description: "List the files downloaded by this session (saved under outputDir/downloads).", - inputSchema: { sessionId: z.string() }, + description: + "List the files downloaded by this session (saved under outputDir/downloads). Pass `read` (index or filename) to also return one file's content, decoded as `encoding` (utf8 or base64).", + inputSchema: { + sessionId: z.string(), + read: z.union([z.number(), z.string()]).optional(), + encoding: z.enum(["utf8", "base64"]).default("utf8"), + }, }, async (args) => { const a = args as Record; @@ -53,7 +59,12 @@ export function registerDialogTools(server: McpServer, sessions: SessionManager) // Idempotent: a no-op when the session wiring already attached it. attachDownloads(s); const downloads = listDownloads(s); - return jsonResult({ count: downloads.length, downloads }); + const base = { count: downloads.length, downloads }; + if (a.read === undefined) return jsonResult(base); + const encoding = a.encoding as "utf8" | "base64"; + const content = readDownload(downloads, a.read as number | string, encoding); + if ("error" in content) return errorResult(content.error, "DOWNLOAD_READ_FAILED"); + return jsonResult({ ...base, content }); }); }, ); diff --git a/src/server/tools/pdf.ts b/src/server/tools/pdf.ts new file mode 100644 index 0000000..8d78daf --- /dev/null +++ b/src/server/tools/pdf.ts @@ -0,0 +1,72 @@ +/** + * `browser_pdf`: render the live page to PDF. Chromium-headless only — Playwright + * `page.pdf()` throws in headed mode or on non-chromium engines; we surface a + * clear error in that case. With `path` the PDF is written to disk; otherwise it + * is returned base64-encoded. + * @module server/tools/pdf + */ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { Page } from "playwright"; +import { z } from "zod"; +import { writeFileBytes } from "../../lib/fs.js"; +import type { SessionManager } from "../../session/manager.js"; +import { errorResult, jsonResult } from "../result.js"; +import { withSession } from "./with-session.js"; + +const HEADLESS_ONLY = "browser_pdf requires headless chromium"; + +/** Options forwarded to Playwright `page.pdf()`. */ +interface PdfOptions { + format?: string; + landscape?: boolean; + printBackground?: boolean; +} + +/** Render `page` to PDF; returns the bytes (no path, so Playwright yields a Buffer). */ +async function renderPdf(page: Page, opts: PdfOptions): Promise { + return page.pdf({ + format: opts.format, + landscape: opts.landscape, + printBackground: opts.printBackground, + }); +} + +/** Register `browser_pdf`. */ +export function registerPdfTool(server: McpServer, sessions: SessionManager): void { + server.registerTool( + "browser_pdf", + { + title: "Render PDF", + description: + "Render the live page to PDF (headless chromium only). With `path` the file is written and {path} returned; otherwise the PDF is returned base64 as {pdfBase64}.", + inputSchema: { + sessionId: z.string(), + path: z.string().optional(), + format: z.string().optional(), + landscape: z.boolean().optional(), + printBackground: z.boolean().optional(), + }, + }, + async (args) => { + const a = args as Record; + return withSession(sessions, String(a.sessionId), async (s) => { + const opts: PdfOptions = { + format: a.format as string | undefined, + landscape: a.landscape as boolean | undefined, + printBackground: a.printBackground as boolean | undefined, + }; + let bytes: Buffer; + try { + bytes = await renderPdf(s.page, opts); + } catch { + return errorResult(HEADLESS_ONLY, "pdf_unsupported"); + } + if (typeof a.path === "string" && a.path.length > 0) { + writeFileBytes(a.path, bytes); + return jsonResult({ path: a.path, bytes: bytes.length }); + } + return jsonResult({ pdfBase64: bytes.toString("base64"), bytes: bytes.length }); + }); + }, + ); +} diff --git a/src/server/tools/permissions.ts b/src/server/tools/permissions.ts new file mode 100644 index 0000000..af30d1b --- /dev/null +++ b/src/server/tools/permissions.ts @@ -0,0 +1,66 @@ +/** + * `browser_permissions`: grant or clear runtime browser permissions + * (geolocation, clipboard-read/write, notifications, camera, microphone…) + * on a live session, optionally scoped to a single origin. + * @module server/tools/permissions + */ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { BrowserContext } from "playwright"; +import { z } from "zod"; +import type { SessionManager } from "../../session/manager.js"; +import { jsonResult } from "../result.js"; +import { withSession } from "./with-session.js"; + +/** grant adds permissions; clear removes every granted permission. */ +export type PermissionAction = "grant" | "clear"; + +const DESC = + "Grant or clear runtime permissions on this session. `grant` (default) allows the listed " + + "permissions (e.g. geolocation, notifications, clipboard-read, clipboard-write, camera, " + + "microphone), optionally scoped to `origin`. `clear` revokes all previously granted permissions."; + +/** + * Apply a permission change to a context. + * @param context - The session browser context. + * @param action - grant or clear. + * @param permissions - Permission names to grant (ignored on clear). + * @param origin - Optional origin to scope a grant to. + */ +export async function applyPermissions( + context: BrowserContext, + action: PermissionAction, + permissions: string[], + origin?: string, +): Promise { + if (action === "clear") return context.clearPermissions(); + return context.grantPermissions(permissions, origin ? { origin } : undefined); +} + +/** Register `browser_permissions`. */ +export function registerPermissionsTool(server: McpServer, sessions: SessionManager): void { + server.registerTool( + "browser_permissions", + { + title: "Grant or clear permissions", + description: DESC, + inputSchema: { + sessionId: z.string(), + permissions: z.array(z.string()).default([]), + origin: z.string().optional(), + action: z.enum(["grant", "clear"]).default("grant"), + }, + }, + async (args) => { + const a = args as Record; + return withSession(sessions, String(a.sessionId), async (s) => { + const action = (a.action as PermissionAction) ?? "grant"; + const permissions = (a.permissions as string[]) ?? []; + const origin = a.origin as string | undefined; + await applyPermissions(s.context, action, permissions, origin); + return action === "clear" + ? jsonResult({ cleared: true }) + : jsonResult({ granted: permissions, origin }); + }); + }, + ); +} diff --git a/src/server/tools/route.ts b/src/server/tools/route.ts new file mode 100644 index 0000000..a9fc8a7 --- /dev/null +++ b/src/server/tools/route.ts @@ -0,0 +1,85 @@ +/** + * `browser_route`: intercept network requests on a live session — mock a + * response, abort matching requests, or remove a previously installed route. + * @module server/tools/route + */ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { BrowserContext, Route } from "playwright"; +import { z } from "zod"; +import type { SessionManager } from "../../session/manager.js"; +import { jsonResult } from "../result.js"; +import { withSession } from "./with-session.js"; + +/** Route action: fulfill with a canned response, block, or remove the route. */ +export type RouteAction = "mock" | "abort" | "unroute"; + +/** Options for a `mock` fulfillment (all optional, Playwright defaults apply). */ +export interface RouteMock { + status?: number; + body?: string; + contentType?: string; +} + +const DESC = + "Intercept requests matching a glob/URL `pattern` on this session. `mock` fulfills them with a " + + "canned response (status/body/contentType); `abort` blocks them; `unroute` removes a route set " + + "earlier with the same pattern. Patterns are Playwright globs, e.g. `**/api/**` or `https://x/*`."; + +/** + * Install or remove a route on a context. + * @param context - The session browser context. + * @param pattern - Playwright URL glob to match. + * @param action - mock, abort, or unroute. + * @param mock - Response options when action is `mock`. + */ +export async function applyRoute( + context: BrowserContext, + pattern: string, + action: RouteAction, + mock: RouteMock = {}, +): Promise { + if (action === "unroute") { + await context.unroute(pattern); + return; + } + if (action === "abort") { + await context.route(pattern, (r: Route) => { + void r.abort(); + }); + return; + } + await context.route(pattern, (r: Route) => { + void r.fulfill({ status: mock.status, body: mock.body, contentType: mock.contentType }); + }); +} + +/** Register `browser_route`. */ +export function registerRouteTool(server: McpServer, sessions: SessionManager): void { + server.registerTool( + "browser_route", + { + title: "Mock or block network", + description: DESC, + inputSchema: { + sessionId: z.string(), + pattern: z.string(), + action: z.enum(["mock", "abort", "unroute"]), + status: z.number().optional(), + body: z.string().optional(), + contentType: z.string().optional(), + }, + }, + async (args) => { + const a = args as Record; + return withSession(sessions, String(a.sessionId), async (s) => { + const action = a.action as RouteAction; + await applyRoute(s.context, String(a.pattern), action, { + status: a.status as number | undefined, + body: a.body as string | undefined, + contentType: a.contentType as string | undefined, + }); + return jsonResult({ routed: String(a.pattern), action }); + }); + }, + ); +} diff --git a/src/server/tools/run-act.ts b/src/server/tools/run-act.ts index 971410f..f8ef016 100644 --- a/src/server/tools/run-act.ts +++ b/src/server/tools/run-act.ts @@ -7,6 +7,7 @@ import type { Page } from "playwright"; import { z } from "zod"; import { actByRef, type RefActionKind } from "../../actions/act-by-ref.js"; import { pickAutocomplete } from "../../actions/autocomplete.js"; +import { drag, hover } from "../../actions/hover-drag.js"; import { smartClick } from "../../actions/smart-click.js"; import { smartFill } from "../../actions/smart-fill.js"; import { type FilesInput, uploadFiles } from "../../actions/upload.js"; @@ -14,7 +15,7 @@ import type { ActionResult } from "../../interfaces/types.js"; import { runWithMemory } from "../../state/action-memory.js"; /** Allowed `browser_act` kinds. */ -export const KIND = z.enum(["click", "fill", "select", "pick", "upload"]); +export const KIND = z.enum(["click", "fill", "select", "pick", "upload", "hover", "drag"]); /** * Run the chosen action (by `ref` or text fallback), with site-memory assist. @@ -34,12 +35,16 @@ export async function runAct( const kind = a.kind as RefActionKind; const value = a.value ? String(a.value) : ""; const option = a.option ? String(a.option) : ""; + const to = a.to ? String(a.to) : ""; const files = (a.files ?? a.value ?? "") as FilesInput; - if (typeof a.ref === "number" || typeof a.ref === "string") return actByRef(page, a.ref, kind, value, option, files); + if (typeof a.ref === "number" || typeof a.ref === "string") + return actByRef(page, a.ref, kind, value, option, files, to); if (typeof a.target !== "string") return null; const target = a.target; if (kind === "pick") return pickAutocomplete(page, page.locator(target).first(), value, option); if (kind === "upload") return uploadFiles(page, target, files); + if (kind === "hover") return hover(page, target); + if (kind === "drag") return drag(page, target, to); return runWithMemory(dir, page, { type: kind, target }, (act) => { const pref = String(act.preferredStrategy ?? ""); return kind === "fill" ? smartFill(page, target, value, pref, human) : smartClick(page, target, pref, human); diff --git a/src/server/tools/snapshot.ts b/src/server/tools/snapshot.ts index c7e93c4..02e61ba 100644 --- a/src/server/tools/snapshot.ts +++ b/src/server/tools/snapshot.ts @@ -42,7 +42,7 @@ export function registerSnapshotTools(server: McpServer, sessions: SessionManage { title: "Act on element", description: - "Execute click/fill/select/pick/upload on an element by `ref` (from browser_snapshot) or by `target` text. `pick` = type `value` into a combobox then click the matching suggestion (`option` text, defaults to `value`) — for airport/city autocompletes. `upload` = set local file path(s) on an `` via `files` (a single path, a comma-separated string, or an array). Returns a diff of what changed. Pass `annotate:true` to also get a Set-of-Marks screenshot of the NEW state (re-marked, anti-drift) for vision models.", + "Execute click/fill/select/pick/upload/hover/drag on an element by `ref` (from browser_snapshot) or by `target` text. `pick` = type `value` into a combobox then click the matching suggestion (`option` text, defaults to `value`) — for airport/city autocompletes. `upload` = set local file path(s) on an `` via `files` (a single path, a comma-separated string, or an array). `hover` = move the pointer over the element (reveals hover menus/tooltips). `drag` = drag the source element onto a destination given by `to` (a snapshot `ref` or a CSS selector). Returns a diff of what changed. Pass `annotate:true` to also get a Set-of-Marks screenshot of the NEW state (re-marked, anti-drift) for vision models.", inputSchema: { sessionId: z.string(), kind: KIND, @@ -51,6 +51,7 @@ export function registerSnapshotTools(server: McpServer, sessions: SessionManage value: z.string().optional(), option: z.string().optional(), files: z.union([z.string(), z.array(z.string())]).optional(), + to: z.string().optional(), annotate: z.boolean().optional(), }, }, diff --git a/src/session/download-read.ts b/src/session/download-read.ts new file mode 100644 index 0000000..3204ba5 --- /dev/null +++ b/src/session/download-read.ts @@ -0,0 +1,54 @@ +/** + * Read the on-disk content of a captured download, resolving it by index or + * filename. Bounds the read size so a huge file can't be slurped into a result. + * @module session/download-read + */ +import { existsSync, readFileSync, statSync } from "node:fs"; +import type { DownloadRecord } from "./downloads.js"; + +/** Max bytes returned by a single read. */ +export const MAX_READ_BYTES = 5 * 1024 * 1024; + +/** Successful read payload. */ +export interface DownloadContent { + filename: string; + encoding: "utf8" | "base64"; + data: string; +} + +/** Read failure (resolution, missing file, or size cap). */ +export interface DownloadReadError { + error: string; +} + +/** Resolve a download by numeric index or by suggestedFilename match. */ +function resolve(records: DownloadRecord[], ref: number | string): DownloadRecord | undefined { + if (typeof ref === "number") return records[ref]; + return records.find((r) => r.suggestedFilename === ref); +} + +/** + * Read a captured download's file by index or filename. + * + * @param records - The session's captured downloads (oldest first). + * @param ref - Index into the list, or a `suggestedFilename` to match. + * @param encoding - `"utf8"` (text) or `"base64"` (bytes). Defaults to utf8. + * @returns The content, or an `{ error }` describing what went wrong. + */ +export function readDownload( + records: DownloadRecord[], + ref: number | string, + encoding: "utf8" | "base64" = "utf8", +): DownloadContent | DownloadReadError { + const record = resolve(records, ref); + if (!record) return { error: `No download matching ${JSON.stringify(ref)}.` }; + if (!record.path || !existsSync(record.path)) { + return { error: `Download "${record.suggestedFilename}" has no file on disk.` }; + } + const size = statSync(record.path).size; + if (size > MAX_READ_BYTES) { + return { error: `File is ${size} bytes, over the ${MAX_READ_BYTES} byte read cap.` }; + } + const data = readFileSync(record.path).toString(encoding); + return { filename: record.suggestedFilename, encoding, data }; +} diff --git a/tests/integration/mcp.test.ts b/tests/integration/mcp.test.ts index 5a285e0..6a23922 100644 --- a/tests/integration/mcp.test.ts +++ b/tests/integration/mcp.test.ts @@ -63,6 +63,11 @@ const EXPECTED = [ "browser_downloads", "browser_console", "browser_network", + "browser_pdf", + "browser_cookies", + "browser_route", + "browser_permissions", + "browser_clipboard", ]; test("MCP exposes the expected tool set with no duplicates", async () => { diff --git a/tests/live/live-browser-tools.ts b/tests/live/live-browser-tools.ts new file mode 100644 index 0000000..802cb65 --- /dev/null +++ b/tests/live/live-browser-tools.ts @@ -0,0 +1,48 @@ +/** + * Live test of the new session tools: browser_cookies, browser_route (mock), + * browser_pdf, browser_permissions, browser_clipboard. + * Run: `node --import tsx tests/live/live-browser-tools.ts` + * @module tests/live/live-browser-tools + */ +import { check, connect, payload, state } from "./live-checks.js"; + +async function main(): Promise { + const client = await connect(); + const open = payload(await client.callTool({ name: "browser_open", arguments: { url: "https://example.com" } })); + const sid = String(open.sessionId); + await client.callTool({ name: "browser_navigate", arguments: { sessionId: sid, url: "https://example.com", waitMs: 800 } }); + + // cookies: set → get → clear + const setc = payload(await client.callTool({ name: "browser_cookies", arguments: { sessionId: sid, action: "set", cookies: [{ name: "fuse", value: "live", url: "https://example.com" }] } })); + check("browser_cookies set", Number(setc.added) === 1, JSON.stringify(setc)); + const getc = payload(await client.callTool({ name: "browser_cookies", arguments: { sessionId: sid, action: "get" } })); + const has = ((getc.cookies as Array<{ name: string; value: string }> | undefined) ?? []).some((c) => c.name === "fuse" && c.value === "live"); + check("browser_cookies get retrouve le cookie posé", has, `${(getc.cookies as unknown[] | undefined)?.length} cookies`); + + // route: mock a document response, then navigate to it + await client.callTool({ name: "browser_route", arguments: { sessionId: sid, pattern: "**/mocked", action: "mock", status: 200, body: "

FUSE-MOCK-OK

", contentType: "text/html" } }); + await client.callTool({ name: "browser_navigate", arguments: { sessionId: sid, url: "https://example.com/mocked", waitMs: 600 } }); + const mtxt = payload(await client.callTool({ name: "browser_extract", arguments: { sessionId: sid, kind: "text", format: "text" } })); + check("browser_route mock une réponse réseau (fulfill)", /FUSE-MOCK-OK/.test(String(mtxt.text ?? "")), String(mtxt.text ?? "").slice(0, 40)); + + // pdf: generate from the live page (headless) + const pdf = payload(await client.callTool({ name: "browser_pdf", arguments: { sessionId: sid } })); + check("browser_pdf génère un PDF (bytes > 0)", Number(pdf.bytes) > 1000, `bytes=${String(pdf.bytes)}, code=${String(pdf.code ?? "")}`); + + // permissions + clipboard: grant then write/read + await client.callTool({ name: "browser_permissions", arguments: { sessionId: sid, permissions: ["clipboard-read", "clipboard-write"], origin: "https://example.com" } }); + const w = payload(await client.callTool({ name: "browser_clipboard", arguments: { sessionId: sid, action: "write", text: "fuse-clip-42" } })); + check("browser_clipboard write", w.written === true || w.code === "clipboard_denied", JSON.stringify(w)); + const r = payload(await client.callTool({ name: "browser_clipboard", arguments: { sessionId: sid, action: "read" } })); + check("browser_clipboard read relit le texte écrit", r.text === "fuse-clip-42" || r.code === "clipboard_denied", JSON.stringify(r)); + + await client.callTool({ name: "browser_close", arguments: { sessionId: sid } }); + await client.close(); + console.log(state.failures === 0 ? "\nRESULT: new browser tools OK en réel" : `\nRESULT: ${state.failures} échec(s)`); + process.exit(state.failures === 0 ? 0 : 1); +} + +main().catch((err) => { + console.error("FATAL:", err); + process.exit(1); +}); diff --git a/tests/live/live-gestures.ts b/tests/live/live-gestures.ts new file mode 100644 index 0000000..702ce02 --- /dev/null +++ b/tests/live/live-gestures.ts @@ -0,0 +1,40 @@ +/** + * Live test of the new `hover` and `drag` actions (browser_act) on real sites. + * Run: `node --import tsx tests/live/live-gestures.ts` + * @module tests/live/live-gestures + */ +import { check, connect, payload, state } from "./live-checks.js"; + +interface El { href?: string; visible?: boolean } + +async function main(): Promise { + const client = await connect(); + + // --- hover: the-internet/hovers reveals a "View profile" link on hover --- + const o1 = payload(await client.callTool({ name: "browser_open", arguments: { url: "https://the-internet.herokuapp.com/hovers" } })); + const s1 = String(o1.sessionId); + await client.callTool({ name: "browser_navigate", arguments: { sessionId: s1, url: "https://the-internet.herokuapp.com/hovers", waitMs: 1200 } }); + await client.callTool({ name: "browser_act", arguments: { sessionId: s1, kind: "hover", target: ".figure:nth-child(3)" } }); + const snap = payload(await client.callTool({ name: "browser_snapshot", arguments: { sessionId: s1 } })); + const profileVisible = ((snap.elements as El[] | undefined) ?? []).some((e) => (e.href ?? "").includes("/users/") && e.visible === true); + check("hover révèle le lien 'View profile' (caption au survol)", profileVisible, `lien users visible=${profileVisible}`); + await client.callTool({ name: "browser_close", arguments: { sessionId: s1 } }); + + // --- drag: the-internet/drag_and_drop --- + const o2 = payload(await client.callTool({ name: "browser_open", arguments: { url: "https://the-internet.herokuapp.com/drag_and_drop" } })); + const s2 = String(o2.sessionId); + await client.callTool({ name: "browser_navigate", arguments: { sessionId: s2, url: "https://the-internet.herokuapp.com/drag_and_drop", waitMs: 1200 } }); + const dr = payload(await client.callTool({ name: "browser_act", arguments: { sessionId: s2, kind: "drag", target: "#column-a", to: "#column-b" } })); + const dragOk = (dr.result as { ok?: boolean } | undefined)?.ok === true; + check("drag action exécutée sans erreur (dragTo source→destination)", dragOk, JSON.stringify(dr.result ?? dr)); + await client.callTool({ name: "browser_close", arguments: { sessionId: s2 } }); + + await client.close(); + console.log(state.failures === 0 ? "\nRESULT: gestures OK en réel" : `\nRESULT: ${state.failures} échec(s)`); + process.exit(state.failures === 0 ? 0 : 1); +} + +main().catch((err) => { + console.error("FATAL:", err); + process.exit(1); +}); diff --git a/tests/unit/clipboard-tool.test.ts b/tests/unit/clipboard-tool.test.ts new file mode 100644 index 0000000..2a4ffcb --- /dev/null +++ b/tests/unit/clipboard-tool.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, test } from "bun:test"; +import type { Page } from "playwright"; +import { applyClipboard } from "../../src/server/tools/clipboard.js"; + +/** + * Fake page whose `evaluate` mirrors Playwright's string mode: it receives a + * JS expression string (an IIFE produced by `evalScript`/`evalScriptArg`) and + * runs it with a stub `navigator.clipboard` in scope, like the real browser. + */ +function fakePage(initial = "") { + let stored = initial; + const navigator = { + clipboard: { + writeText: async (t: string): Promise => { + stored = t; + }, + readText: async (): Promise => stored, + }, + }; + const page = { + async evaluate(expression: string): Promise { + const run = new Function("navigator", `return (${expression});`); + return run(navigator); + }, + } as unknown as Page; + return { page, read: () => stored }; +} + +describe("applyClipboard", () => { + test("write stores the text and returns undefined", async () => { + const { page, read } = fakePage(); + const out = await applyClipboard(page, "write", "hello"); + expect(out).toBeUndefined(); + expect(read()).toBe("hello"); + }); + + test("write of missing text writes an empty string", async () => { + const { page, read } = fakePage("old"); + await applyClipboard(page, "write"); + expect(read()).toBe(""); + }); + + test("read returns the current clipboard text", async () => { + const { page } = fakePage("copied"); + expect(await applyClipboard(page, "read")).toBe("copied"); + }); +}); diff --git a/tests/unit/cookies-tool.test.ts b/tests/unit/cookies-tool.test.ts new file mode 100644 index 0000000..22226d4 --- /dev/null +++ b/tests/unit/cookies-tool.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, test } from "bun:test"; +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import type { BrowserContext } from "playwright"; +import type { SessionManager } from "../../src/session/manager.js"; +import type { SessionData } from "../../src/session/session.js"; +import { registerCookiesTool } from "../../src/server/tools/cookies.js"; + +type Handler = (args: Record) => Promise; + +/** Capture the registered handler from a fake McpServer. */ +function captureHandler(): { server: McpServer; handler: () => Handler } { + let captured: Handler | undefined; + const server = { + registerTool: (_name: string, _cfg: unknown, fn: Handler) => { + captured = fn; + }, + } as unknown as McpServer; + return { server, handler: () => captured as Handler }; +} + +/** Spy context recording the cookie calls it received. */ +function fakeContext() { + const calls = { + cookiesUrls: undefined as unknown, + added: undefined as unknown, + cleared: 0, + }; + const context = { + cookies: async (urls?: unknown) => { + calls.cookiesUrls = urls; + return [{ name: "sid", value: "1" }]; + }, + addCookies: async (cookies: unknown) => { + calls.added = cookies; + }, + clearCookies: async () => { + calls.cleared += 1; + }, + } as unknown as BrowserContext; + return { context, calls }; +} + +/** Session manager wired to a given context. */ +function fakeSessions(context: BrowserContext): SessionManager { + const session = { id: "s", health: "ok", context } as unknown as SessionData; + return { + get: () => session, + markBusy: () => {}, + markIdle: () => {}, + } as unknown as SessionManager; +} + +describe("browser_cookies", () => { + test("get -> calls context.cookies(urls) and returns them", async () => { + const { context, calls } = fakeContext(); + const { server, handler } = captureHandler(); + registerCookiesTool(server, fakeSessions(context)); + const res = await handler()({ sessionId: "s", action: "get", urls: ["https://a.com"] }); + expect(calls.cookiesUrls).toEqual(["https://a.com"]); + expect((res.structuredContent as Record).cookies).toEqual([ + { name: "sid", value: "1" }, + ]); + }); + + test("set -> calls addCookies and returns {added}", async () => { + const { context, calls } = fakeContext(); + const { server, handler } = captureHandler(); + registerCookiesTool(server, fakeSessions(context)); + const cookies = [{ name: "t", value: "v", url: "https://a.com" }]; + const res = await handler()({ sessionId: "s", action: "set", cookies }); + expect(calls.added).toEqual(cookies); + expect((res.structuredContent as Record).added).toBe(1); + }); + + test("clear -> calls clearCookies and returns {cleared}", async () => { + const { context, calls } = fakeContext(); + const { server, handler } = captureHandler(); + registerCookiesTool(server, fakeSessions(context)); + const res = await handler()({ sessionId: "s", action: "clear" }); + expect(calls.cleared).toBe(1); + expect((res.structuredContent as Record).cleared).toBe(true); + }); + + test("a thrown error becomes a cookies_failed error result", async () => { + const context = { + clearCookies: async () => { + throw new Error("boom"); + }, + } as unknown as BrowserContext; + const { server, handler } = captureHandler(); + registerCookiesTool(server, fakeSessions(context)); + const res = await handler()({ sessionId: "s", action: "clear" }); + expect(res.isError).toBe(true); + expect((res.structuredContent as Record).code).toBe("cookies_failed"); + }); +}); diff --git a/tests/unit/download-read.test.ts b/tests/unit/download-read.test.ts new file mode 100644 index 0000000..715a06a --- /dev/null +++ b/tests/unit/download-read.test.ts @@ -0,0 +1,76 @@ +import { afterAll, describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { MAX_READ_BYTES, readDownload } from "../../src/session/download-read.js"; +import type { DownloadRecord } from "../../src/session/downloads.js"; + +const dir = mkdtempSync(join(tmpdir(), "dl-read-")); +afterAll(() => rmSync(dir, { recursive: true, force: true })); + +/** Write a file in the temp dir and return a matching DownloadRecord. */ +function record(name: string, bytes: Buffer | string): DownloadRecord { + const path = join(dir, name); + writeFileSync(path, bytes); + return { url: `http://x/${name}`, suggestedFilename: name, path, at: Date.now() }; +} + +describe("readDownload", () => { + const text = record("note.txt", "héllo world"); + const bin = record("blob.bin", Buffer.from([0x00, 0xff, 0x10])); + const records = [text, bin]; + + test("reads utf8 by index", () => { + const out = readDownload(records, 0, "utf8"); + expect(out).toEqual({ filename: "note.txt", encoding: "utf8", data: "héllo world" }); + }); + + test("reads base64 by filename", () => { + const out = readDownload(records, "blob.bin", "base64"); + expect(out).toEqual({ + filename: "blob.bin", + encoding: "base64", + data: Buffer.from([0x00, 0xff, 0x10]).toString("base64"), + }); + }); + + test("defaults to utf8", () => { + const out = readDownload(records, 0); + expect("data" in out && out.encoding).toBe("utf8"); + }); + + test("invalid index → error", () => { + expect(readDownload(records, 9)).toEqual({ error: expect.stringContaining("No download") }); + }); + + test("unknown filename → error", () => { + expect(readDownload(records, "nope.txt")).toEqual({ + error: expect.stringContaining("No download"), + }); + }); + + test("missing file on disk → error", () => { + const ghost: DownloadRecord = { + url: "http://x/gone", + suggestedFilename: "gone.txt", + path: join(dir, "gone.txt"), + at: Date.now(), + }; + expect(readDownload([ghost], 0)).toEqual({ error: expect.stringContaining("no file on disk") }); + }); + + test("record with empty path → error", () => { + const pending: DownloadRecord = { + url: "http://x/p", + suggestedFilename: "p.txt", + path: "", + at: Date.now(), + }; + expect(readDownload([pending], 0)).toEqual({ error: expect.stringContaining("no file") }); + }); + + test("file over the size cap → error", () => { + const big = record("big.bin", Buffer.alloc(MAX_READ_BYTES + 1)); + expect(readDownload([big], 0)).toEqual({ error: expect.stringContaining("read cap") }); + }); +}); diff --git a/tests/unit/hover-drag.test.ts b/tests/unit/hover-drag.test.ts new file mode 100644 index 0000000..5db9c2c --- /dev/null +++ b/tests/unit/hover-drag.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, test } from "bun:test"; +import type { Locator, Page } from "playwright"; +import { drag, dragLocator, hover, hoverLocator } from "../../src/actions/hover-drag.js"; + +/** A locator stub recording hover() / dragTo() invocations. */ +function makeLocator(name = "loc"): { locator: Locator; calls: string[] } { + const calls: string[] = []; + const self = { + name, + first: () => self, + hover: async () => void calls.push("hover"), + dragTo: async (dest: { name: string }) => void calls.push(`dragTo:${dest.name}`), + }; + return { locator: self as unknown as Locator, calls }; +} + +/** A locator whose hover()/dragTo() always throw. */ +function makeFailingLocator(): Locator { + const fail = async () => { + throw new Error("Element is not visible\nstack"); + }; + return { first: () => ({ hover: fail, dragTo: fail }), hover: fail, dragTo: fail } as unknown as Locator; +} + +describe("hoverLocator", () => { + test("calls locator.hover() and reports ok", async () => { + const { locator, calls } = makeLocator(); + expect(await hoverLocator(locator)).toEqual({ type: "hover", ok: true }); + expect(calls).toEqual(["hover"]); + }); + + test("captures a single-line error on failure", async () => { + const r = await hoverLocator(makeFailingLocator()); + expect(r).toEqual({ type: "hover", ok: false, error: "Error: Element is not visible" }); + }); +}); + +describe("hover", () => { + test("resolves target and tags the result", async () => { + const { locator, calls } = makeLocator(); + const page = { locator: () => locator } as unknown as Page; + expect(await hover(page, ".menu")).toEqual({ type: "hover", ok: true, target: ".menu" }); + expect(calls).toEqual(["hover"]); + }); + + test("fails without touching the page when target is empty", async () => { + let touched = false; + const page = { + locator: () => { + touched = true; + return null as unknown as Locator; + }, + } as unknown as Page; + expect(await hover(page, "")).toEqual({ type: "hover", ok: false, error: "no_target" }); + expect(touched).toBe(false); + }); +}); + +describe("dragLocator", () => { + test("calls source.dragTo(destination)", async () => { + const src = makeLocator("src"); + const dst = makeLocator("dst"); + expect(await dragLocator(src.locator, dst.locator)).toEqual({ type: "drag", ok: true }); + expect(src.calls).toEqual(["dragTo:dst"]); + }); + + test("captures a single-line error on failure", async () => { + const r = await dragLocator(makeFailingLocator(), makeLocator().locator); + expect(r).toEqual({ type: "drag", ok: false, error: "Error: Element is not visible" }); + }); +}); + +describe("drag", () => { + test("resolves source + destination and tags the result", async () => { + const src = makeLocator("src"); + const dst = makeLocator("dst"); + const queried: string[] = []; + const page = { + locator: (sel: string) => { + queried.push(sel); + return sel === "#src" ? src.locator : dst.locator; + }, + } as unknown as Page; + expect(await drag(page, "#src", "#dst")).toEqual({ type: "drag", ok: true, target: "#src", to: "#dst" }); + expect(queried).toEqual(["#src", "#dst"]); + expect(src.calls).toEqual(["dragTo:dst"]); + }); + + test("fails without source / destination", async () => { + const p = {} as unknown as Page; + expect(await drag(p, "", "#dst")).toEqual({ type: "drag", ok: false, error: "no_target" }); + expect(await drag(p, "#src", "")).toEqual({ type: "drag", ok: false, error: "no_destination" }); + }); +}); diff --git a/tests/unit/pdf-tool.test.ts b/tests/unit/pdf-tool.test.ts new file mode 100644 index 0000000..769cd92 --- /dev/null +++ b/tests/unit/pdf-tool.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import type { SessionManager } from "../../src/session/manager.js"; +import type { SessionData } from "../../src/session/session.js"; +import { registerPdfTool } from "../../src/server/tools/pdf.js"; + +type Handler = (args: Record) => Promise; + +/** Capture the registered handler from a fake McpServer. */ +function captureHandler(): { server: McpServer; handler: () => Handler } { + let captured: Handler | undefined; + const server = { + registerTool: (_name: string, _cfg: unknown, fn: Handler) => { + captured = fn; + }, + } as unknown as McpServer; + return { server, handler: () => captured as Handler }; +} + +/** Minimal session manager backed by one session whose page.pdf is `pdf`. */ +function fakeSessions(pdf: SessionData["page"]["pdf"]): SessionManager { + const session = { id: "s", health: "ok", page: { pdf } } as unknown as SessionData; + return { + get: () => session, + markBusy: () => {}, + markIdle: () => {}, + } as unknown as SessionManager; +} + +describe("browser_pdf", () => { + test("no path -> returns base64 with the right options forwarded", async () => { + let seen: unknown; + const { server, handler } = captureHandler(); + registerPdfTool( + server, + fakeSessions((async (o: unknown) => { + seen = o; + return Buffer.from("%PDF-1.4 fake"); + }) as SessionData["page"]["pdf"]), + ); + const res = await handler()({ + sessionId: "s", + format: "A4", + landscape: true, + printBackground: true, + }); + expect(seen).toEqual({ format: "A4", landscape: true, printBackground: true }); + const payload = res.structuredContent as Record; + expect(Buffer.from(payload.pdfBase64 as string, "base64").toString()).toBe("%PDF-1.4 fake"); + }); + + test("with path -> writes the file and returns {path}", async () => { + const out = join(tmpdir(), `fuse-pdf-${Date.now()}.pdf`); + const { server, handler } = captureHandler(); + registerPdfTool( + server, + fakeSessions((async () => Buffer.from("ONDISK")) as SessionData["page"]["pdf"]), + ); + const res = await handler()({ sessionId: "s", path: out }); + expect((res.structuredContent as Record).path).toBe(out); + expect(readFileSync(out, "utf-8")).toBe("ONDISK"); + }); + + test("page.pdf throwing -> clear headless-only error", async () => { + const { server, handler } = captureHandler(); + registerPdfTool( + server, + fakeSessions((async () => { + throw new Error("PDF generation is only supported in headless mode"); + }) as SessionData["page"]["pdf"]), + ); + const res = await handler()({ sessionId: "s" }); + expect(res.isError).toBe(true); + expect((res.structuredContent as Record).code).toBe("pdf_unsupported"); + }); +}); diff --git a/tests/unit/permissions-tool.test.ts b/tests/unit/permissions-tool.test.ts new file mode 100644 index 0000000..8418c17 --- /dev/null +++ b/tests/unit/permissions-tool.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from "bun:test"; +import type { BrowserContext } from "playwright"; +import { applyPermissions } from "../../src/server/tools/permissions.js"; + +/** Fake context recording grant/clear calls. */ +function fakeContext() { + const grants: Array<{ perms: string[]; opts?: { origin: string } }> = []; + let cleared = 0; + const context = { + async grantPermissions(perms: string[], opts?: { origin: string }): Promise { + grants.push({ perms, opts }); + }, + async clearPermissions(): Promise { + cleared += 1; + }, + } as unknown as BrowserContext; + return { context, grants, cleared: () => cleared }; +} + +describe("applyPermissions", () => { + test("grant forwards permissions without origin when none given", async () => { + const { context, grants } = fakeContext(); + await applyPermissions(context, "grant", ["geolocation", "notifications"]); + expect(grants).toEqual([{ perms: ["geolocation", "notifications"], opts: undefined }]); + }); + + test("grant scopes to an origin when provided", async () => { + const { context, grants } = fakeContext(); + await applyPermissions(context, "grant", ["clipboard-read"], "https://x.com"); + expect(grants).toEqual([ + { perms: ["clipboard-read"], opts: { origin: "https://x.com" } }, + ]); + }); + + test("clear revokes all and never grants", async () => { + const { context, grants, cleared } = fakeContext(); + await applyPermissions(context, "clear", []); + expect(cleared()).toBe(1); + expect(grants).toHaveLength(0); + }); +}); diff --git a/tests/unit/route-tool.test.ts b/tests/unit/route-tool.test.ts new file mode 100644 index 0000000..a72fcc8 --- /dev/null +++ b/tests/unit/route-tool.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, test } from "bun:test"; +import type { BrowserContext, Route } from "playwright"; +import { applyRoute } from "../../src/server/tools/route.js"; + +/** Fake context recording route/unroute calls and replaying a handler. */ +function fakeContext() { + const routes: Array<{ pattern: string; handler: (r: Route) => unknown }> = []; + const unrouted: string[] = []; + const context = { + async route(pattern: string, handler: (r: Route) => unknown): Promise { + routes.push({ pattern, handler }); + }, + async unroute(pattern: string): Promise { + unrouted.push(pattern); + }, + } as unknown as BrowserContext; + return { context, routes, unrouted }; +} + +/** Fake Route capturing fulfill/abort. */ +function fakeRoute() { + const calls: { fulfill?: unknown; abort: number } = { abort: 0 }; + const route = { + async fulfill(opts: unknown): Promise { + calls.fulfill = opts; + }, + async abort(): Promise { + calls.abort += 1; + }, + } as unknown as Route; + return { route, calls }; +} + +describe("applyRoute", () => { + test("mock installs a route that fulfills with the given options", async () => { + const { context, routes } = fakeContext(); + await applyRoute(context, "**/api/**", "mock", { + status: 201, + body: "{}", + contentType: "application/json", + }); + expect(routes).toHaveLength(1); + expect(routes[0]?.pattern).toBe("**/api/**"); + const { route, calls } = fakeRoute(); + await routes[0]?.handler(route); + expect(calls.fulfill).toEqual({ status: 201, body: "{}", contentType: "application/json" }); + }); + + test("abort installs a route that aborts the request", async () => { + const { context, routes } = fakeContext(); + await applyRoute(context, "**/track", "abort"); + const { route, calls } = fakeRoute(); + await routes[0]?.handler(route); + expect(calls.abort).toBe(1); + expect(calls.fulfill).toBeUndefined(); + }); + + test("unroute removes the route and installs nothing", async () => { + const { context, routes, unrouted } = fakeContext(); + await applyRoute(context, "**/api/**", "unroute"); + expect(unrouted).toEqual(["**/api/**"]); + expect(routes).toHaveLength(0); + }); +});