diff --git a/examples/integrations/langchain/README.md b/examples/integrations/langchain/README.md index eb1c084..3e9d894 100644 --- a/examples/integrations/langchain/README.md +++ b/examples/integrations/langchain/README.md @@ -2,12 +2,21 @@ This directory contains examples of integrating Langchain with our web automation tools: -1. **Browserbase Integration**: A lightweight solution for web scraping and data extraction using our managed browser infrastructure. +1. **Browserbase Integration** (`browserbase/`): A lightweight solution for web scraping and data extraction using our managed browser infrastructure. -2. **Stagehand Integration**: Full web automation capabilities using our open-source AI-powered browser automation SDK. +2. **Stagehand Integration** (`stagehand/`): Full web automation capabilities using our open-source AI-powered browser automation SDK. + +3. **Deep Agents + Browserbase** (`deepagents-browserbase/`): A Python example combining LangChain Deep Agents with Browserbase Search, Fetch, and Stagehand browser sessions for research workflows. + +4. **KYC Onboarding Agent** (`kyc-onboarding/`): A Python example that runs automated KYC due diligence on a company using a five-track fan-out (corporate registry, beneficial ownership, sanctions, litigation, adverse media) and per-owner individual KYC checks. Uses Deep Agents for orchestration and Stagehand for portal navigation. + +5. **Patent Landscape Agent** (`patent-landscape-agent/`): A Python example that researches a patent landscape across USPTO, EPO, and WIPO using a three-phase workflow — parallel five-track research (granted patents, prosecution history, ownership, PTAB litigation, inventor network), per-family deep dives, and final memo synthesis. Uses OpenAI as the single model provider for both the Deep Agents orchestrator and Stagehand browser sessions. Choose the example that best fits your needs: - Use Browserbase for simple web scraping and data collection - Use Stagehand for complex automation workflows with AI-driven interactions +- Use Deep Agents + Browserbase for multi-step research agents with human approval gates +- Use KYC Onboarding for compliance workflows that navigate gated portals and forms +- Use Patent Landscape Agent for multi-source research with persistent workpapers and structured memo output See the respective directories for detailed implementation guides. \ No newline at end of file diff --git a/examples/integrations/langchain/kyc-onboarding/.env.example b/examples/integrations/langchain/kyc-onboarding/.env.example new file mode 100644 index 0000000..5db156f --- /dev/null +++ b/examples/integrations/langchain/kyc-onboarding/.env.example @@ -0,0 +1,12 @@ +BROWSERBASE_API_KEY=bb_... + +# Use direct OpenAI +OPENAI_API_KEY=sk-... + +# Or use the Browserbase Model Gateway instead (no separate OpenAI key needed) +# DEEPAGENT_BASE_URL=https:// + +# Optional model overrides +# DEEPAGENT_MODEL=gpt-5.4 +# STAGEHAND_MODEL=google/gemini-3-flash-preview +# STAGEHAND_AGENT_MODEL=anthropic/claude-sonnet-4-6 diff --git a/examples/integrations/langchain/kyc-onboarding/.gitignore b/examples/integrations/langchain/kyc-onboarding/.gitignore new file mode 100644 index 0000000..69cac2e --- /dev/null +++ b/examples/integrations/langchain/kyc-onboarding/.gitignore @@ -0,0 +1,4 @@ +.env +.venv/ +__pycache__/ +*.pyc diff --git a/examples/integrations/langchain/kyc-onboarding/README.md b/examples/integrations/langchain/kyc-onboarding/README.md new file mode 100644 index 0000000..4ee0fb9 --- /dev/null +++ b/examples/integrations/langchain/kyc-onboarding/README.md @@ -0,0 +1,142 @@ +# LangChain Deep Agents + KYC Onboarding (Python) + +This example builds a KYC onboarding agent that runs automated due diligence on a company +using LangChain Deep Agents for orchestration, Stagehand for browser interactions, and +Browserbase for headless browser sessions. + +The agent navigates real portals — Secretary of State sites, FinCEN BOI, OFAC, PACER, SEC EDGAR, +and news publishers — rather than relying on search or fetch APIs alone. Browserbase Agent Identity +handles anti-bot measures and CAPTCHAs automatically. + +## Architecture + +**Phase 1 — five-track fan-out:** + +| Subagent | Portals | +|---|---| +| `corporate-registry` | Secretary of State portals (Delaware, California, New York, UK Companies House) | +| `beneficial-ownership` | FinCEN BOI, UK PSC register, EU member-state registries | +| `sanctions-pep` | OFAC SDN, EU consolidated list, UK HMT financial sanctions | +| `litigation-regulatory` | PACER, state courts, SEC EDGAR | +| `adverse-media` | Reuters, Bloomberg, FT, WSJ, SEC newsroom, DOJ press releases | + +**Phase 2 — individual KYC fan-out:** + +After the `beneficial-ownership` subagent returns a list of ultimate beneficial owners (UBOs), +the orchestrator delegates one `individual-kyc` subagent call per person. Each individual check +covers sanctions screening, PEP status, and adverse media. + +**Tools (defined in `kyc_tools.py`):** + +- `kyc_search`: Browserbase Search for initial discovery before opening portals +- `kyc_fetch`: Browserbase Fetch for static pages (EDGAR filings, public registries) +- `kyc_extract_from_portal`: opens a Browserbase session and extracts structured data from a + rendered portal page using Stagehand `extract` +- `kyc_search_portal`: opens a Browserbase session and runs a multi-step browser task (form fills, + paginated search navigation) using a Stagehand agent — gated behind human approval + +Every browser session produces a session replay URL included in the tool response for audit purposes. + +## Requirements + +- Python 3.11+ +- `BROWSERBASE_API_KEY` for Browserbase Search, Fetch, and browser sessions (including Stagehand) +- An OpenAI-compatible model for the Deep Agent orchestrator + +The sample accepts `OPENAI_API_KEY` for direct OpenAI access, or `BROWSERBASE_API_KEY` paired with +`DEEPAGENT_BASE_URL` for a Browserbase-backed OpenAI-compatible gateway. + +Default models: +- Deep Agent orchestrator: `gpt-4o` +- Stagehand extraction: `google/gemini-3-flash-preview` +- Stagehand interactive agent: `anthropic/claude-sonnet-4-6` + +## Install + +```bash +cd examples/integrations/langchain/kyc-onboarding +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +## Environment + +Copy `.env.example` to `.env` and fill in your keys: + +```bash +cp .env.example .env +``` + +```ini +BROWSERBASE_API_KEY=bb_... + +# Use direct OpenAI +OPENAI_API_KEY=sk-... + +# Or use the Browserbase Model Gateway instead (no separate OpenAI key needed) +# DEEPAGENT_BASE_URL=https:// + +# Optional model overrides +# DEEPAGENT_MODEL=gpt-5.4 +# STAGEHAND_MODEL=google/gemini-3-flash-preview +# STAGEHAND_AGENT_MODEL=anthropic/claude-sonnet-4-6 +``` + +## Run + +Use the default demo company: + +```bash +python main.py +``` + +Run on a specific company and jurisdiction: + +```bash +python main.py "Stripe, Inc." --jurisdiction Delaware +python main.py "Revolut Ltd" --jurisdiction "United Kingdom" +``` + +Override the model: + +```bash +python main.py "Acme Corp Inc." --jurisdiction Delaware --model gpt-4o +``` + +## Approval flow + +`kyc_search_portal` is gated behind `interrupt_on`. Whenever the agent wants to submit a search +form, navigate paginated results, or interact with a portal, the script pauses and prompts: + +``` +Pending tool call +Tool: kyc_search_portal +Arguments: +{ + "start_url": "https://icis.corp.delaware.gov/ecorp/entitysearch/namesearch.aspx", + "task": "Search for 'Acme Corp Inc.' and extract the entity status, formation date, and registered agent." +} +Allowed decisions: approve, edit, reject +Decision [approve/edit/reject]: +``` + +- `approve` — run the task as proposed +- `edit` — provide replacement JSON args before running +- `reject` — skip this action + +This puts human review at the tool boundary rather than inside ad hoc shell calls. + +## Notes + +- `kyc_extract_from_portal` maps to Stagehand `extract` — structured, read-only extraction from a rendered page. +- `kyc_search_portal` maps to Stagehand `agent().execute()` — multi-step agentic browsing for portals that require interaction. +- Browserbase Agent Identity manages browser fingerprinting, residential proxy routing, and CAPTCHA solving. No additional configuration is needed beyond `BROWSERBASE_API_KEY`. +- Each `kyc_search_portal` and `kyc_extract_from_portal` response includes a `session_url` field linking to the Browserbase session replay. These replays provide visual evidence for audit workpapers. +- The individual-kyc Phase 2 fan-out is driven by the orchestrator parsing the `beneficial-ownership` subagent report. If the registry is not publicly searchable, the subagent notes this explicitly and Phase 2 proceeds with whatever names were found. + +## Suggested prompts + +- `python main.py "Anthropic, PBC" --jurisdiction Delaware` +- `python main.py "Klarna Bank AB" --jurisdiction Sweden` +- `python main.py "Acme Corp Inc." --jurisdiction Delaware` (demo company, expect sparse results) diff --git a/examples/integrations/langchain/kyc-onboarding/kyc_tools.py b/examples/integrations/langchain/kyc-onboarding/kyc_tools.py new file mode 100644 index 0000000..b2787cc --- /dev/null +++ b/examples/integrations/langchain/kyc-onboarding/kyc_tools.py @@ -0,0 +1,234 @@ +from __future__ import annotations + +import asyncio +import json +import os +import re +from typing import Any + +from browserbase import Browserbase +from bs4 import BeautifulSoup +from langchain.tools import tool +from stagehand import AsyncStagehand + +DEFAULT_STAGEHAND_MODEL = os.getenv( + "STAGEHAND_MODEL", + "google/gemini-3-flash-preview", +) +DEFAULT_STAGEHAND_AGENT_MODEL = os.getenv( + "STAGEHAND_AGENT_MODEL", + "anthropic/claude-sonnet-4-6", +) + + +def _require_env(name: str) -> str: + value = os.getenv(name, "").strip() + if not value: + raise ValueError(f"Missing required environment variable: {name}") + return value + + +def _browserbase_client() -> Browserbase: + return Browserbase(api_key=_require_env("BROWSERBASE_API_KEY")) + + +def _normalize(value: Any) -> Any: + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, dict): + return {str(key): _normalize(val) for key, val in value.items()} + if isinstance(value, (list, tuple, set)): + return [_normalize(item) for item in value] + if hasattr(value, "model_dump"): + return _normalize(value.model_dump()) + if hasattr(value, "dict"): + return _normalize(value.dict()) + if hasattr(value, "__dict__"): + public = { + key: val + for key, val in vars(value).items() + if not key.startswith("_") and not callable(val) + } + if public: + return _normalize(public) + return str(value) + + +def _json(value: Any) -> str: + return json.dumps(_normalize(value), indent=2, default=str) + + +def _html_to_text(html: str, max_chars: int) -> tuple[str, str]: + soup = BeautifulSoup(html, "html.parser") + title = soup.title.get_text(" ", strip=True) if soup.title else "" + for tag in soup(["script", "style", "noscript"]): + tag.decompose() + body = soup.body or soup + text = body.get_text("\n", strip=True) + text = re.sub(r"\n{3,}", "\n\n", text) + return title, text[:max_chars] + + +def _stagehand_client() -> AsyncStagehand: + return AsyncStagehand( + browserbase_api_key=_require_env("BROWSERBASE_API_KEY"), + ) + + +def _run_async(coro: Any) -> Any: + return asyncio.run(coro) + + +@tool +def kyc_search(query: str, num_results: int = 5) -> str: + """Search the web with Browserbase. Use first for discovery before opening portals or pages.""" + bb = _browserbase_client() + response = bb.search.web(query=query, num_results=max(1, min(num_results, 10))) + results = [] + for result in getattr(response, "results", []): + results.append( + { + "title": getattr(result, "title", ""), + "url": getattr(result, "url", ""), + "author": getattr(result, "author", None), + "published_date": ( + getattr(result, "published_date", None) + or getattr(result, "publishedDate", None) + ), + } + ) + return _json( + { + "query": query, + "request_id": getattr(response, "request_id", None) + or getattr(response, "requestId", None), + "results": results, + } + ) + + +@tool +def kyc_fetch(url: str, use_proxy: bool = False, max_chars: int = 12000) -> str: + """Fetch page content without a browser session. Best for static pages such as SEC EDGAR filings and public registries. + + Returns an error with a fallback instruction if the page exceeds 1MB — use kyc_extract_from_portal for those URLs instead. + """ + bb = _browserbase_client() + try: + response = bb.fetch_api.create(url=url, proxies=use_proxy) + except Exception as e: + return _json({"error": str(e), "fallback": "Page too large for Fetch. Call kyc_extract_from_portal with this URL instead."}) + content = getattr(response, "content", "") + content_type = ( + getattr(response, "content_type", None) + or getattr(response, "contentType", "") + or "" + ).lower() + + title = "" + text = str(content)[:max_chars] + if "html" in content_type: + title, text = _html_to_text(str(content), max_chars=max_chars) + + return _json( + { + "url": url, + "status_code": getattr(response, "status_code", None) + or getattr(response, "statusCode", None), + "content_type": getattr(response, "content_type", None) + or getattr(response, "contentType", None), + "encoding": getattr(response, "encoding", None), + "title": title, + "text": text, + } + ) + + +@tool +def kyc_extract_from_portal(start_url: str, instruction: str) -> str: + """Open a Browserbase browser session and extract structured data from a rendered portal page using Stagehand. + + Use for read-only lookups on JavaScript-heavy registry portals, sanctions lists, and court record sites. + The session_url in the response links to the Browserbase session replay. + """ + return _run_async(_kyc_extract_from_portal_async(start_url=start_url, instruction=instruction)) + + +async def _kyc_extract_from_portal_async(start_url: str, instruction: str) -> str: + client = _stagehand_client() + start_resp = await client.sessions.start( + model_name=DEFAULT_STAGEHAND_MODEL, + ) + session_id = start_resp.data.session_id + + try: + await client.sessions.navigate( + id=session_id, + url=start_url, + frame_id="", + ) + result = await client.sessions.extract( + id=session_id, + instruction=instruction, + ) + extracted = getattr(getattr(result, "data", None), "result", None) + return _json( + { + "start_url": start_url, + "session_id": session_id, + "session_url": f"https://browserbase.com/sessions/{session_id}", + "instruction": instruction, + "result": _normalize(extracted), + } + ) + finally: + await client.sessions.end(id=session_id) + + +@tool +def kyc_search_portal(start_url: str, task: str) -> str: + """Open a Browserbase browser session and run a multi-step browser task on a portal using a Stagehand agent. + + Use for interactive lookups that require filling search forms, navigating paginated results, or + submitting queries to gated portals (Secretary of State, FinCEN BOI, OFAC, PACER, EDGAR). + Agent Identity handles anti-bot measures and CAPTCHAs automatically. + The session_url in the response links to the Browserbase session replay. + """ + return _run_async(_kyc_search_portal_async(start_url=start_url, task=task)) + + +async def _kyc_search_portal_async(start_url: str, task: str) -> str: + client = _stagehand_client() + start_resp = await client.sessions.start( + model_name=DEFAULT_STAGEHAND_AGENT_MODEL, + ) + session_id = start_resp.data.session_id + + try: + await client.sessions.navigate( + id=session_id, + url=start_url, + frame_id="", + ) + result = await client.sessions.execute( + id=session_id, + execute_options={ + "instruction": task, + "max_steps": 25, + }, + agent_config={ + "model": DEFAULT_STAGEHAND_AGENT_MODEL, + "instructions": ( + "You are executing a KYC lookup on behalf of a compliance agent. " + "Be precise, extract all relevant fields, and stop once the lookup is complete. " + "If a CAPTCHA or challenge appears, pause and report it." + ), + }, + timeout=300.0, + ) + data = _normalize(result) + if isinstance(data, dict): + data["session_url"] = f"https://browserbase.com/sessions/{session_id}" + return _json(data) + finally: + await client.sessions.end(id=session_id) diff --git a/examples/integrations/langchain/kyc-onboarding/main.py b/examples/integrations/langchain/kyc-onboarding/main.py new file mode 100644 index 0000000..3316569 --- /dev/null +++ b/examples/integrations/langchain/kyc-onboarding/main.py @@ -0,0 +1,415 @@ +from __future__ import annotations + +import argparse +import json +import os +import uuid +from typing import Any + +from deepagents import create_deep_agent +from dotenv import load_dotenv +from langgraph.checkpoint.memory import MemorySaver +from langgraph.types import Command +from langchain_openai import ChatOpenAI + +from kyc_tools import ( + kyc_extract_from_portal, + kyc_fetch, + kyc_search, + kyc_search_portal, +) + + +SYSTEM_PROMPT = """You are a KYC onboarding orchestrator. Given a company name and jurisdiction, +run a two-phase due diligence workflow and return a consolidated KYC report. + +Phase 1 — fan out to all five specialist subagents in parallel: +- corporate-registry: verify legal existence, registered agent, filing status, and good standing. +- beneficial-ownership: identify ultimate beneficial owners (UBOs) and controlling persons. +- sanctions-pep: screen the entity against OFAC, EU, and UK sanctions lists and PEP registries. +- litigation-regulatory: search PACER, relevant state courts, and SEC EDGAR for material actions. +- adverse-media: search publisher news sites for negative coverage, fraud, or enforcement actions. + +Phase 2 — individual KYC fan-out: +- Parse the beneficial-ownership report for each named individual owner. +- For each person, delegate to the individual-kyc subagent to run sanctions, PEP, and adverse-media checks. + +Final output: +- Consolidated report with findings per track. +- Flag any hits, discrepancies, or items requiring manual review. +- Include session replay URLs from each browser session for audit purposes. +""" + +_ALL_TOOLS = [kyc_search, kyc_fetch, kyc_extract_from_portal, kyc_search_portal] + +CORPORATE_REGISTRY = { + "name": "corporate-registry", + "description": ( + "Verifies corporate legal existence, registered agent, filing status, and good standing " + "via state Secretary of State portals and equivalent registries." + ), + "system_prompt": """You are a corporate registry specialist. + +Your task: confirm the company's legal existence and standing in the given jurisdiction. + +Target portals (select based on jurisdiction): +- Delaware: https://icis.corp.delaware.gov/ecorp/entitysearch/namesearch.aspx +- California: https://bizfileonline.sos.ca.gov/search/business +- New York: https://apps.dos.ny.gov/publicInquiry/ +- Federal (UK): https://find-and-update.company-information.service.gov.uk/ + +Steps: +1. Use kyc_search to locate the correct portal for the jurisdiction. +2. Use kyc_search_portal to search by company name and retrieve: entity type, status, formation date, + registered agent name and address, and any recent filings. +3. Use kyc_extract_from_portal for read-only extraction on rendered results pages. + +Return a structured report with all retrieved fields and the session replay URL for each session. +""", + "tools": _ALL_TOOLS, +} + +BENEFICIAL_OWNERSHIP = { + "name": "beneficial-ownership", + "description": ( + "Identifies ultimate beneficial owners (UBOs) and controlling persons via FinCEN BOI " + "and jurisdiction-specific registries." + ), + "system_prompt": """You are a beneficial ownership specialist. + +Your task: identify all ultimate beneficial owners (UBOs) and controlling persons for the entity. + +Target portals: +- FinCEN BOI search (if available): https://boiefiling.fincen.gov/ +- UK PSC register: https://find-and-update.company-information.service.gov.uk/ +- EU registries vary by member state; use kyc_search to locate the correct portal. + +Steps: +1. Search the FinCEN BOI system and any jurisdiction-specific beneficial ownership registry. +2. Extract each named individual: full name, ownership percentage, nature of control, nationality. +3. If the registry is not publicly searchable, note it explicitly in your report. + +Return a structured list of beneficial owners with all available fields, plus session replay URLs. +This list will be used by the Phase 2 individual KYC fan-out. +""", + "tools": _ALL_TOOLS, +} + +SANCTIONS_PEP = { + "name": "sanctions-pep", + "description": ( + "Screens the entity against OFAC SDN, EU consolidated sanctions list, UK HMT financial " + "sanctions list, and PEP registries." + ), + "system_prompt": """You are a sanctions and PEP screening specialist. + +Your task: screen the company against all major sanctions and PEP lists. + +Target portals: +- OFAC SDN: https://sanctionssearch.ofac.treas.gov/ +- EU consolidated list: https://eeas.europa.eu/topics/sanctions-policy/8442/consolidated-list-sanctions_en +- UK HMT: https://www.gov.uk/government/publications/financial-sanctions-consolidated-list-of-targets + +Steps: +1. Search each portal by the exact company name and any known aliases or trading names. +2. Record: whether a match was found, match score or confidence, relevant list entry details, + and the date of the check. +3. If no match is found, state that explicitly with the search terms used. + +Return a per-list screening result with session replay URLs for audit evidence. +""", + "tools": _ALL_TOOLS, +} + +LITIGATION_REGULATORY = { + "name": "litigation-regulatory", + "description": ( + "Searches PACER, relevant state courts, and SEC EDGAR for material litigation, " + "enforcement actions, and regulatory filings." + ), + "system_prompt": """You are a litigation and regulatory specialist. + +Your task: identify material litigation, enforcement actions, and relevant regulatory filings. + +Steps: + +1. SEC EDGAR full-text search (JSON API — use kyc_fetch, not kyc_search_portal): + Construct the URL by substituting the company name into the query parameter, e.g.: + https://efts.sec.gov/LATEST/search-index?q=%22Stripe%2C+Inc.%22&dateRange=custom&startdt=2015-01-01 + Parse the JSON response for hits referencing legal proceedings, enforcement, or Wells notices. + +2. SEC EDGAR company search (rendered page — use kyc_extract_from_portal): + Construct the URL by substituting the company name, e.g.: + https://www.sec.gov/cgi-bin/browse-edgar?company=Stripe&action=getcompany + Extract any filings that reference legal proceedings (8-K Item 8.01, litigation releases). + +3. PACER Case Locator (interactive portal — use kyc_search_portal): + https://pcl.uscourts.gov/pcl/pages/search/findCase.jsf + Search for federal civil and criminal cases naming the company as a party. + +4. Use kyc_search to surface any publicly reported regulatory actions not captured above. + +Summarize: case name, docket number, court, filing date, current status, and resolution if any. +Return findings with session replay URLs for each browser session. +""", + "tools": _ALL_TOOLS, +} + +ADVERSE_MEDIA = { + "name": "adverse-media", + "description": ( + "Searches publisher news sites for negative coverage, fraud allegations, enforcement actions, " + "or reputational risk signals." + ), + "system_prompt": """You are an adverse media specialist. + +Your task: identify negative news coverage that represents reputational or compliance risk. + +Search targets (use kyc_search, then kyc_fetch or kyc_extract_from_portal for full articles): +- Reuters, Bloomberg, Financial Times, Wall Street Journal +- Regulatory and enforcement news sites (SEC.gov newsroom, FinCEN advisories, DOJ press releases) +- Industry-specific press + +Search terms to use: +- "{company} fraud" +- "{company} enforcement" +- "{company} investigation" +- "{company} money laundering" +- "{company} sanctions" + +Steps: +1. Run searches with each term combination. +2. For each hit, fetch the article and extract: headline, publication, date, summary of allegation, + and outcome if known. +3. Distinguish confirmed findings from allegations. + +Return a ranked list of adverse media findings by severity, with source URLs. +""", + "tools": _ALL_TOOLS, +} + +INDIVIDUAL_KYC = { + "name": "individual-kyc", + "description": ( + "Runs KYC checks on an individual beneficial owner: sanctions screening, PEP status, " + "and adverse media search." + ), + "system_prompt": """You are an individual KYC specialist. + +You will receive a person's name, nationality, and role. Run the following checks: + +1. Sanctions screening: + - OFAC SDN: https://sanctionssearch.ofac.treas.gov/ + - EU consolidated list: https://eeas.europa.eu/topics/sanctions-policy/8442/consolidated-list-sanctions_en + - UK HMT: https://www.gov.uk/government/publications/financial-sanctions-consolidated-list-of-targets + +2. PEP screening: + - Use kyc_search for "{name} politically exposed person" and "{name} government official". + +3. Adverse media: + - Search for "{name} fraud", "{name} investigation", "{name} enforcement". + +Return a structured report with: name, role, sanctions result (match/no-match per list), +PEP status, adverse media hits, and session replay URLs for each browser session. +""", + "tools": _ALL_TOOLS, +} + + +def _normalize_chat_model_name(model: str) -> str: + if ":" in model: + provider, raw_model = model.split(":", 1) + if provider == "openai": + return raw_model + return model + + +def build_model(model: str) -> ChatOpenAI: + base_url = os.getenv("DEEPAGENT_BASE_URL") or os.getenv("OPENAI_BASE_URL") + openai_api_key = os.getenv("OPENAI_API_KEY") + browserbase_api_key = os.getenv("BROWSERBASE_API_KEY") + + if openai_api_key: + api_key = openai_api_key + elif browserbase_api_key and base_url: + api_key = browserbase_api_key + else: + raise ValueError( + "Missing Deep Agent model configuration. Set OPENAI_API_KEY for direct OpenAI access, " + "or set BROWSERBASE_API_KEY together with DEEPAGENT_BASE_URL/OPENAI_BASE_URL for a " + "Browserbase-backed OpenAI-compatible gateway." + ) + + kwargs: dict[str, Any] = { + "model": _normalize_chat_model_name(model), + "api_key": api_key, + } + if base_url: + kwargs["base_url"] = base_url + return ChatOpenAI(**kwargs) + + +def build_agent(model: str): + return create_deep_agent( + model=build_model(model), + tools=[kyc_search, kyc_fetch], + subagents=[ + CORPORATE_REGISTRY, + BENEFICIAL_OWNERSHIP, + SANCTIONS_PEP, + LITIGATION_REGULATORY, + ADVERSE_MEDIA, + INDIVIDUAL_KYC, + ], + system_prompt=SYSTEM_PROMPT, + interrupt_on={ + "kyc_search_portal": { + "allowed_decisions": ["approve", "edit", "reject"] + } + }, + checkpointer=MemorySaver(), + ) + + +def _stringify_content(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + if item.get("type") == "text": + parts.append(str(item.get("text", ""))) + else: + parts.append(json.dumps(item, default=str)) + else: + parts.append(str(item)) + return "\n".join(part for part in parts if part) + return json.dumps(content, indent=2, default=str) + + +def _final_text(result: Any) -> str: + state = getattr(result, "value", result) + if isinstance(state, dict): + messages = state.get("messages", []) + for message in reversed(messages): + msg_type = getattr(message, "type", None) + if msg_type is None and isinstance(message, dict): + msg_type = message.get("type") or message.get("role") + if msg_type in {"ai", "assistant"}: + content = getattr(message, "content", None) + if content is None and isinstance(message, dict): + content = message.get("content") + return _stringify_content(content) + return json.dumps(state, indent=2, default=str) + return str(state) + + +def _collect_decisions(interrupt_value: Any) -> list[dict[str, Any]]: + action_requests = interrupt_value["action_requests"] + review_configs = interrupt_value["review_configs"] + config_by_name = {config["action_name"]: config for config in review_configs} + decisions: list[dict[str, Any]] = [] + + for action in action_requests: + review = config_by_name[action["name"]] + allowed = review["allowed_decisions"] + + print("\nPending tool call") + print(f"Tool: {action['name']}") + print("Arguments:") + print(json.dumps(action["args"], indent=2, default=str)) + print(f"Allowed decisions: {', '.join(allowed)}") + + while True: + raw = input("Decision [approve/edit/reject]: ").strip().lower() + if raw in allowed: + if raw == "approve": + decisions.append({"type": "approve"}) + break + if raw == "reject": + decisions.append({"type": "reject"}) + break + + edited = input("Enter replacement JSON args: ").strip() + try: + edited_args = json.loads(edited) + except json.JSONDecodeError: + print("Invalid JSON. Try again.") + continue + decisions.append( + { + "type": "edit", + "edited_action": { + "name": action["name"], + "args": edited_args, + }, + } + ) + break + + print("Invalid decision for this tool call.") + + return decisions + + +def _review_actions(result: Any) -> Any: + interrupts = result.interrupts + if len(interrupts) == 1: + return {"decisions": _collect_decisions(interrupts[0].value)} + # Multiple concurrent interrupts — LangGraph requires keying resume by interrupt id. + return { + interrupt.id: {"decisions": _collect_decisions(interrupt.value)} + for interrupt in interrupts + } + + +def run(company: str, jurisdiction: str, model: str) -> str: + agent = build_agent(model=model) + config = {"configurable": {"thread_id": str(uuid.uuid4())}} + query = f"Run a full KYC check on {company!r} incorporated in {jurisdiction}." + result = agent.invoke( + {"messages": [{"role": "user", "content": query}]}, + config=config, + version="v2", + ) + + while result.interrupts: + result = agent.invoke( + Command(resume=_review_actions(result)), + config=config, + version="v2", + ) + + return _final_text(result) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="KYC onboarding agent powered by LangChain Deep Agents, Stagehand, and Browserbase." + ) + parser.add_argument( + "company", + nargs="?", + default="Acme Corp Inc.", + help="Full legal name of the company to screen (default: 'Acme Corp Inc.')", + ) + parser.add_argument( + "--jurisdiction", + default="Delaware", + help="Incorporation jurisdiction (default: Delaware)", + ) + parser.add_argument( + "--model", + default=os.getenv("DEEPAGENT_MODEL", "gpt-5.4"), + help="Deep Agents model name.", + ) + return parser.parse_args() + + +if __name__ == "__main__": + load_dotenv() + args = parse_args() + print(run(company=args.company, jurisdiction=args.jurisdiction, model=args.model)) diff --git a/examples/integrations/langchain/kyc-onboarding/requirements.txt b/examples/integrations/langchain/kyc-onboarding/requirements.txt new file mode 100644 index 0000000..0a9bb1a --- /dev/null +++ b/examples/integrations/langchain/kyc-onboarding/requirements.txt @@ -0,0 +1,6 @@ +beautifulsoup4>=4.13.0 +browserbase>=1.8.0 +deepagents>=0.0.5 +langchain-openai>=0.3.0 +python-dotenv>=1.0.0 +stagehand>=3.19.5 diff --git a/examples/integrations/langchain/patent-landscape-agent/.env.example b/examples/integrations/langchain/patent-landscape-agent/.env.example new file mode 100644 index 0000000..c5d269c --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/.env.example @@ -0,0 +1,14 @@ +OPENAI_API_KEY=sk-... +BROWSERBASE_API_KEY=bb_... +# BROWSERBASE_PROJECT_ID=prj_... # optional; scopes sessions to a specific project + +# Optional: override the orchestrator model (default: gpt-4.1) +# PATENT_AGENT_MODEL=gpt-4.1 + +# Optional: override the Stagehand browser agent model (default: openai/gpt-4.1) +# STAGEHAND_MODEL=openai/gpt-4o + +# Optional: LangSmith traces +# LANGSMITH_TRACING=true +# LANGSMITH_API_KEY= +# LANGSMITH_PROJECT= diff --git a/examples/integrations/langchain/patent-landscape-agent/.gitignore b/examples/integrations/langchain/patent-landscape-agent/.gitignore new file mode 100644 index 0000000..6fd5cc0 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/.gitignore @@ -0,0 +1,5 @@ +.env +.venv/ +__pycache__/ +*.pyc +reports/ diff --git a/examples/integrations/langchain/patent-landscape-agent/BLOG_POST.md b/examples/integrations/langchain/patent-landscape-agent/BLOG_POST.md new file mode 100644 index 0000000..d118ad7 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/BLOG_POST.md @@ -0,0 +1,302 @@ +# Building a Patent Landscape Agent with LangChain Deep Agents and Browserbase + +Patent research is hard — even for trained professionals. The portals that hold the most authoritative data (USPTO Patent Public Search, PTAB, EPO Espacenet, WIPO Patentscope) were designed for expert users navigating dense forms with institutional knowledge. Sessions time out mid-search. Search forms require specific field combinations that aren't documented. Results arrive paginated across dozens of screens. CAPTCHAs appear mid-session. Some portals simply go offline for maintenance. + +Now imagine writing a script to automate that. + +This is the problem browser agents were made for. In this post, we'll walk through how we built a patent landscape research agent using LangChain Deep Agents for orchestration, Stagehand as the SDK for browser agents, and Browserbase for headless browser infrastructure. The agent navigates real patent portals — the same ones a patent attorney would use — fans out across five research tracks in parallel, and synthesizes findings into a structured memo with an audit trail of session replays. + +## Why patent portals can't be handled by an API + +Most research agents are built on search and fetch APIs. These work well for the parts of the web designed to be indexed: news articles, product pages, public APIs, documentation. But APIs see roughly 15% of the web. The other 85% — authenticated portals, JavaScript-rendered applications, gated government databases — requires a browser. + +Patent portals sit firmly in that 85%. Consider what it takes to pull prosecution history from USPTO Patent Center: you navigate to the portal, enter a patent number, wait for the application to render, click through to the Image File Wrapper, select the document type, navigate pagination, and download each document. The portal uses JavaScript rendering throughout, enforces session state, and returns nothing meaningful to a raw HTTP fetch. Try to automate it naively and you get blocked immediately. + +The same is true for PTAB proceedings, EPO Espacenet family searches, WIPO Patentscope PCT data, and USPTO Assignment Search. Each one has its own interaction model. Each one has anti-bot measures. Each one occasionally goes offline. + +This is precisely where [Browserbase](https://browserbase.com) comes in. Its Agent Identity — built on strategic partnerships with Cloudflare and a dedicated research team — gets browser agents past the detection systems that block traditional automation. Stagehand, Browserbase's SDK for browser agents, replaces brittle CSS selectors with natural language instructions that adapt when pages change. Together, they give agents the ability to use the web the way a human expert would. + +## What the agent does + +The patent landscape agent takes a technology area, assignee name, or seed patent number and runs a three-phase research workflow: + +**Phase 1 — parallel five-track research.** Five specialist subagents run concurrently, each responsible for one research track: + +| Subagent | Portals | +|---|---| +| `granted-patents` | USPTO Patent Public Search, EPO Espacenet, WIPO Patentscope | +| `prosecution-history` | USPTO Patent Center (+ Google Patents fallback) | +| `assignment-ownership` | USPTO Patent Assignment Search, EPO Register | +| `litigation-history` | USPTO PTAB (+ Google Patents fallback) | +| `inventor-network` | USPTO search, EPO Espacenet | + +**Phase 2 — per-family deep dives.** After Phase 1 completes, the orchestrator identifies the highest-priority patent families and dispatches a `patent-family-analysis` subagent for each one. This subagent traces the full family tree: parent applications, continuations, divisionals, and foreign equivalents across every jurisdiction. + +**Phase 3 — synthesis.** The orchestrator reads all research workpapers, cross-references ownership with litigation risk, surfaces freedom-to-operate flags, and writes a final memo to `./reports/final-memo.md`. + +Every browser session — every portal visit by every subagent — produces a Browserbase session replay URL that appears in the memo's sources appendix, providing a visual audit trail of exactly what the agent saw. + +## Architecture + +``` +agent.py orchestrator (gpt-4.1, OpenAI) +├── tools/patent_research.py single tool: Stagehand agent over Browserbase +└── subagents/ + ├── granted_patents.py USPTO search, EPO Espacenet, WIPO + ├── prosecution_history.py USPTO Patent Center, Google Patents fallback + ├── assignment_ownership.py USPTO Assignments, EPO Register + ├── litigation_history.py USPTO PTAB, Google Patents fallback + ├── inventor_network.py inventor portfolio mapping + └── patent_family_analysis.py continuations, divisionals, foreign equivalents +``` + +The entire demo uses **OpenAI as the sole model provider** — one API key backs both the Deep Agents orchestrator (`gpt-4.1`) and all Stagehand browser sessions (`openai/gpt-4o`). No additional LLM provider keys are needed. + +## The `patent_research` tool + +All browser interactions go through a single LangChain tool: `patent_research`. It accepts the target portal by name, a natural-language task description, an output schema, and an optional session ID for follow-up queries on the same portal page. + +```python +PORTAL_URLS: dict[str, str] = { + "uspto_search": "https://ppubs.uspto.gov/pubwebapp/", + "uspto_patent_center": "https://patentcenter.uspto.gov/", + "uspto_assignments": "https://assignment.uspto.gov/patent/index.html#/patent/search", + "uspto_ptab": "https://ptab.uspto.gov/", + "epo_espacenet": "https://worldwide.espacenet.com/", + "epo_register": "https://register.epo.org/", + "wipo_patentscope": "https://patentscope.wipo.int/search/en/search.jsf", + "google_patents": "https://patents.google.com/", # fallback +} + +@tool +def patent_research( + portal: str, + query: str, + output_schema: str, + previous_session_id: str = "", +) -> str: + """Research patents on a specific public portal using a Stagehand browser agent.""" + return _run_async( + _patent_research_async(portal, query, output_schema, previous_session_id or None) + ) +``` + +Under the hood, each call creates a Browserbase session, navigates to the portal, and hands control to a Stagehand agent configured with OpenAI: + +```python +async def _patent_research_async(portal, query, output_schema, previous_session_id): + client = AsyncStagehand( + browserbase_api_key=_require_env("BROWSERBASE_API_KEY"), + model_api_key=_require_env("OPENAI_API_KEY"), # single provider + ) + session_id = previous_session_id or (await client.sessions.start( + model_name="openai/gpt-4o" + )).data.session_id + + if not previous_session_id: + await client.sessions.navigate(id=session_id, url=PORTAL_URLS[portal], frame_id="") + + result = await client.sessions.execute( + id=session_id, + execute_options={"instruction": f"{query}\n\nExtract: {output_schema}", "max_steps": 30}, + agent_config={ + "model": "openai/gpt-4o", + "instructions": ( + "You are a patent research assistant performing read-only lookups " + "on public patent databases. Extract all requested fields precisely." + ), + }, + timeout=300.0, + ) + + return _json({ + "portal": portal, + "session_id": session_id, + "session_url": f"https://browserbase.com/sessions/{session_id}", + "result": _normalize(result), + }) +``` + +The `session_url` in every response links to a Browserbase session replay — a full visual recording of everything the agent did on that portal. These URLs flow into the final memo's sources appendix, giving you an unambiguous audit trail for every claim. + +### Why not just use a search API? + +It's worth being direct about this. For many research tasks, a search or fetch API is the right tool — faster and cheaper than spinning up a full browser session. But patent portals specifically block those approaches: + +- **USPTO Patent Public Search** requires JavaScript rendering and form interaction to return search results +- **PTAB** uses session state that a stateless fetch can't establish +- **EPO Espacenet** family searches require navigating through multiple click sequences +- **WIPO Patentscope** uses JavaScript-rendered results that aren't in the page source + +These portals weren't designed with machine access in mind. They were designed for human experts. That's exactly why a browser agent — something that interacts with the web the way a human does — is the right abstraction. + +## Subagents: focused system prompts, shared tool + +Each subagent is a dict following the Deep Agents convention: a name, description, system prompt, and tool list. All six share the same `patent_research` tool; what differs is the system prompt, which directs the subagent to specific portals and specifies what to extract. + +Here's the litigation history subagent as an example: + +```python +LITIGATION_HISTORY = { + "name": "litigation-history", + "description": ( + "Searches USPTO PTAB for IPR and PGR proceedings against the top patents. " + "Identifies which patents have been challenged, the petitioner, and the outcome." + ), + "system_prompt": """You are a patent litigation specialist focused on PTAB proceedings. + +Primary portal: uspto_ptab +Fallback portal: google_patents (use if PTAB portal is unavailable or returns no results) + +For each patent, extract all IPR, PGR, CBM, and ex parte reexamination proceedings: +- Proceeding number, type, filing date, petitioner +- Institution decision: date and outcome (instituted / denied) +- Final written decision: date and outcome (claims cancelled / confirmed / mixed) +- Whether an appeal was filed + +If the PTAB portal is inaccessible, switch to google_patents and search for the patent +number. Google Patents displays PTAB proceedings under the "Events" tab. +""", + "tools": [patent_research], +} +``` + +The fallback to Google Patents matters in practice. Government portals go offline for maintenance, return blank pages under load, or block automated sessions intermittently. Building the fallback into the system prompt — rather than into the tool — keeps the tool simple and lets each subagent handle its own failure modes. + +## The orchestrator + +The orchestrator is created with `create_deep_agent`, wiring together all six subagents, the `patent_research` tool, and filesystem access for persisting workpapers: + +```python +def build_agent(model: str): + os.makedirs("./reports", exist_ok=True) + return create_deep_agent( + model=ChatOpenAI(model=model, api_key=_require_env("OPENAI_API_KEY")), + tools=[patent_research], + subagents=[ + GRANTED_PATENTS, PROSECUTION_HISTORY, ASSIGNMENT_OWNERSHIP, + LITIGATION_HISTORY, INVENTOR_NETWORK, PATENT_FAMILY_ANALYSIS, + ], + system_prompt=SYSTEM_PROMPT, + permissions=[FilesystemPermission( + operations=["read", "write"], + paths=[os.path.abspath("./reports")], + )], + checkpointer=MemorySaver(), + ) +``` + +`FilesystemPermission` restricts the agent's filesystem access to `./reports/`, where it writes workpapers and reads them during synthesis. The `permissions` parameter is propagated automatically to subagents by the Deep Agents framework — subagents can use the `write_file` tool that FilesystemMiddleware injects. + +The system prompt lays out the three-phase plan: + +```python +SYSTEM_PROMPT = """You are a patent landscape research orchestrator. + +Start by writing ./reports/research-plan.md with your todo list for this run. + +Phase 1 — fan out all five specialist subagents in parallel: +- granted-patents, prosecution-history, assignment-ownership, + litigation-history, inventor-network + +Each subagent writes its workpaper to ./reports/.md. + +Phase 2 — per-family deep dives: +- Read the granted-patents workpaper to identify top 3–5 priority families. +- For each family, delegate to patent-family-analysis. +- Each analysis is written to ./reports/family-.md. + +Phase 3 — synthesis: +- Read all workpapers from ./reports/ using read_file. +- Cross-reference ownership with litigation risk. +- Surface FTO flags based on lapsed or un-entered foreign equivalents. +- Write the final memo to ./reports/final-memo.md. +""" +``` + +## Handling portal failures gracefully + +One thing became immediately clear running this against real portals: USPTO Patent Center and PTAB go down more often than you'd expect. This isn't a bug in the agent — it's a property of the infrastructure it's navigating. + +The agent handles this through subagent-level fallback instructions. When PTAB returns a blank page, the litigation subagent switches to Google Patents, which aggregates PTAB proceedings under each patent's "Events" tab. When Patent Center is unavailable, the prosecution subagent falls back to Google Patents' prosecution timeline. + +This design keeps the `patent_research` tool stateless and general-purpose. Failure handling lives in the system prompt, where it's easy to read, adjust, and extend without touching the tool layer. + +## Running the agent + +Install, configure, and run: + +```bash +cd examples/integrations/langchain/patent-landscape-agent +python3 -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +cp .env.example .env # add OPENAI_API_KEY and BROWSERBASE_API_KEY +``` + +Default run (PageRank benchmark): + +```bash +python agent.py +``` + +Custom queries: + +```bash +python agent.py "MapReduce distributed computing — Google, US7650331" +python agent.py "Transformer attention mechanism — Google Brain, US10452978" +python agent.py "US6285999" # single seed patent +``` + +## Benchmark: PageRank (Stanford / Google, US6285999) + +We validated the agent end-to-end on the PageRank patent family — a good benchmark because it's well-documented, has a traceable ownership chain (Stanford licensed exclusively to Google in 1998), includes EPO and PCT equivalents, and went through ex parte reexamination in 2011. + +The agent's key findings: + +```markdown +## Executive Summary + +- US6285999B1 is the anchor; Stanford filed associated continuations assigned to + Google via exclusive license executed in 1998 (Reel 009410, Frame 0460). +- EPO EP1062579 granted; CA, JP, AU national phase entries confirmed via WIPO Patentscope. +- Ex parte reexamination closed 2011; reexamination certificate issued with amended claims. +- No active PTAB proceedings found (IPR/PGR filing window closed; patent expired Jan 2018). +- Core PageRank claims expired January 9, 2018 (20-year term from US priority date). + No remaining FTO risk on the core ranking algorithm claims. +- Inventor network: Lawrence Page (primary), Sergey Brin listed on related applications. + Both inventors' subsequent filing activity visible at Google. + +## FTO Flags + +- Core US claims: EXPIRED — no FTO risk. +- EPO EP1062579: confirm lapse status manually via EPO Register (term-based expiry expected). +- PCT national phase countries where patent was NOT entered: potential open FTO jurisdictions. +``` + +Typical run: ~14 `patent_research` tool calls, 6 subagent invocations, ~30 minutes end-to-end. + +## Session replays as audit evidence + +Every `patent_research` call returns a `session_url` field pointing to the Browserbase session replay for that browser session. These URLs are aggregated in the final memo's sources appendix. + +For patent due diligence specifically, this matters: if a human reviewer later questions whether a PTAB proceeding was found or a portal was actually searched, the session replay shows exactly what the agent saw — the same visual evidence a human researcher would provide in a work product note. + +Replays are accessible in the Browserbase dashboard for 7 days after the run and can be shared directly with a URL. + +## What this demonstrates + +Patent portals are a useful stress test for browser agents because they concentrate every challenge at once: complex multi-step navigation, JavaScript rendering, bot detection, session state, CAPTCHA gates, and portals that go offline. If a browser agent framework can handle these portals reliably, it can handle most of the difficult parts of the web. + +The key architectural decisions that make this work in practice: + +1. **One tool, many portals.** A single `patent_research` tool handles all six patent databases. The portal name and query describe what to do; Stagehand figures out how to do it. +2. **Subagent-level fallbacks.** Portal failures are handled in system prompts, not in tool code. This keeps the tool general and the failure logic visible and editable. +3. **Session replay for every call.** Every browser session is recorded. The audit trail is automatic, not bolted on. +4. **One model provider.** A single `OPENAI_API_KEY` backs both orchestration and browser reasoning. No additional accounts or keys needed. + +## Resources + +- [Source code](https://github.com/browserbase/integrations/tree/main/examples/integrations/langchain/patent-landscape-agent) +- [Browserbase documentation](https://docs.browserbase.com) +- [Stagehand documentation](https://docs.stagehand.dev) +- [LangChain Deep Agents documentation](https://docs.browserbase.com/integrations/langchain/deepagents) diff --git a/examples/integrations/langchain/patent-landscape-agent/Browserbase_Platform_1-Pager.md b/examples/integrations/langchain/patent-landscape-agent/Browserbase_Platform_1-Pager.md new file mode 100644 index 0000000..1b6f124 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/Browserbase_Platform_1-Pager.md @@ -0,0 +1,97 @@ +# Browserbase Platform 1 Pager (shareable version) + + + +- Table of contents + +## What is the Browserbase Platform? + +Browserbase is the complete platform to build and deploy agents that browse and interact with the web like humans: + +- **Browsers:** give agents complete control over fleets of headless browsers to interact with websites +- [**Fetch and Search APIs**](https://www.notion.so/Fetch-API-Search-API-Sales-Enablement-7e92ce90bb9349c5b03f70f9ca9c4d07?pvs=21): agents can quickly search and fetch LLM context from the web for quick and token-efficient decisions +- [**Agent Identity](https://www.notion.so/Showing-up-on-the-web-with-agent-identity-32f3c11b6614808ba0a4e0923b492613?pvs=21):** enable agents to access any website with a combination of strategic partnerships (*ex: Cloudflare*) and a dedicated stealth research team +- [**Functions**](https://www.notion.so/Browser-Functions-Sales-Enablement-2fe3c11b6614803f93c1eb6cc4da7146?pvs=21): deploy and run agents on Browserbase for faster & more secure execution +- [**Model Gateway**](https://www.notion.so/Model-Gateway-1-Pager-2ea3c11b66148024b26df7099ebedae3?pvs=21): your Browserbase API key gives access to major models via Stagehand and unified billing. + +![Graphic.png](Browserbase%20Platform%201%20Pager%20(shareable%20version)/Graphic.png) + +All the above, powered by Browserbase’s best-in-class observability with rich logs, live view and replay and its scalable and secure infrastructure layer running 35m+ monthly browser sessions across 10,000 customers, including Ramp, Shopify and Lovable. + +## Why this matters + +### **Market context** + +AI agents are everywhere. Coding agents, assistants, support bots, deep research tools. All of them need web access, and the market has responded by creating higher-level primitives beyond headless browsers: search APIs (Parallel, Exa), fetch and crawl APIs (Firecrawl, Cloudflare). These are faster and cheaper than a full browser session for simple read-only use cases. +This is an expansion of the market, not a shrinking of it. More agents accessing the web means more demand across every layer: search, fetch, and browsers. But it also means customers now expect a complete toolkit, not just one primitive. The teams building browser agents today are stitching together 5+ vendors (search, fetch, browsers, models, deployment) before writing a single prompt. +Browserbase has already won the browser layer. The platform move **extends that position** into the adjacent primitives our customers are already buying from other vendors, **under one API key, with the browser at the center**. + +### The problems + +#### **Problems that we already solve with headless browsers** + +- **Agents fail because the web wasn't built for AI**: agents accessing the web face blocked requests, CAPTCHA walls, and anti-bot detection at every turn. Most websites return huge blobs of HTML that are expensive to parse and hard to reason about. +- **Legacy automation frameworks are brittle and require time and headcount to build and maintain**: traditional tools like Selenium and Puppeteer scripts break constantly when websites change. Teams end up maintaining a whole stack of app servers, queues, retries, schedulers, and browser infrastructure. +- **Most agents are slow and expensive**: browser agents repeat the same work over and over, extracting data from similar websites and burning through tokens. Scripts on similar pages don't share cached results, and every new script starts from scratch with no shared knowledge across runs. +- **Self-hosting browser infrastructure is a trap**: high round-trip latency kills performance, scaling means paying separately for compute and browser infra, and teams end up managing 10-15 providers across observability, networking, storage, orchestration, and deployment. Running browsers one at a time creates bottlenecks at scale, and poor visibility makes issues hard to debug. + +#### **Problems that our Platform offering addresses** + +- **Your AI Agent stack is 5+ vendors before you write a single prompt**: the AI Agent stack market is fragmented and customers need to bundle multiple vendors. + + ![If you have to zoom in on the market map, it’s a fragmented market. ](Browserbase%20Platform%201%20Pager%20(shareable%20version)/image.png) + + If you have to zoom in on the market map, it’s a fragmented market. + +- **APIs see 15% of the web. Agents need the other 85%**: As more agents get access to the web through web search, gaining access to private or complex websites becomes a strong moat than ever (*ex: Ramp's agent*) + +### The (unified) solution + +One API key, everything your agent needs to browse the web: + +- **Browsers that work where APIs can't**: programmatic access to fleets of headless browsers at scale. Spin up as many concurrent sessions as your agents need, with globally distributed infrastructure, 2 vCPUs per browser, isolated sessions, and SOC-2 Type II compliance. Browsers are the core of the platform. +- **One platform, one vendor**: Browsers, Search, Fetch, Functions, Model Gateway (one API key, every model, zero friction), and Agent Identity under a single account. One bill, one place to debug (*rich logs, live view, and session replay across every step*), fewer integrations to maintain. +- **Unrestricted access to the web**: Agent Identity is a global passport for your agents. Strategic partnerships (*Cloudflare, Stytch, Fingerprint, Vercel*) and secure credential management (*1Password*) get agents past anti-bot systems, CAPTCHAs, and authentication walls. +- **Deploy instantly, zero infrastructure**: Functions run your code next to the browser with <5ms latency. No Temporal, no job schedulers, no headaches. Just `bb function deploy` and you're live, with built-in observability (session recordings, logs, metrics in one place). +- **Stagehand, the SDK for browser agents**: Stagehand replaces rigid selector with natural language browser interactions that self-heal when pages change. Its automatic action caching eliminates redundant LLM calls across runs (*up to 2x faster, ~30% cost reduction on repeated actions*). + +### The opportunity + +- **Be the first mover at solving the ["impossible triangles of AI Web Infra"](https://youtu.be/XwNQvOxJ0IU?t=164):** define and lead the browser agent platform category. + + +- **Double down into the maturing market of (browser) agents:** more use cases and more winning arguments for our sales team (*ex: vendor bundling*). +- **APIs are high-margin products**: enabling us to lower the price of our browsers offering over time and compete aggressively on browser pricing where needed. + +## How to write/talk about Browserbase + +**Dos** + +- **Use "Browser Agent"** or "Agent … the web." instead of "Web Agent" +[**“Browser Agent”**: A browser agent is an AI system (agent) that can autonomously navigate and interact with web browsers much like a human user would, but directed by natural language instructions. Essentially, it bridges the gap between an AI's reasoning capabilities and the visual, interactive world of the web.](https://www.notion.so/Browser-Agent-A-browser-agent-is-an-AI-system-agent-that-can-autonomously-navigate-and-interact--35e3c11b66148013983ffc34f5ce1c5a?pvs=21) +- **Replace "AI" with "Agents"** in most external copy. +Works for high-level positioning and vision. Keep "AI" or "automation" wording in deeper-dive sales materials and case studies where the customer uses that language. + - Same for **"Agentic workflows" → "Agents"** (*simpler, more direct*) +- When listing our platform primitives, **always put browsers first**. + +**Don’ts** + +- "**Serverless** browsers" → "Headless browsers" +- "Browser **automation** framework" (for Stagehand) → "SDK for browser agents" +- **“Stealth”** → "Agent Identity" (we're moving from sneaky/undetected to upfront/credential-first) +- **No direct reference to automation or scraping** in public positioning. Paul is hesitant about "scraping" anywhere in positioning. Don't lead with "best search or fetch API" either, that's easy to disprove. +- Don't lead with Search or Fetch as standalone products. They're side dishes, the browser is the draw. +- Don't position around benchmarks we can't defend ("best search API," "fastest fetch") + +## Appendixes + +### Glossary + +- **“Browser Agent”**: A browser agent is an AI system (agent) that can autonomously navigate and interact with web browsers much like a human user would, but directed by natural language instructions. Essentially, it bridges the gap between an AI's reasoning capabilities and the visual, interactive world of the web. + + *Why no “Web Agent”? → [“Browser Agent” vs. “Web Agent”](https://www.notion.so/Browser-Agent-vs-Web-Agent-3293c11b66148062bfbacfa56b0e0503?pvs=21)* \ No newline at end of file diff --git a/examples/integrations/langchain/patent-landscape-agent/README.md b/examples/integrations/langchain/patent-landscape-agent/README.md new file mode 100644 index 0000000..5d3e181 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/README.md @@ -0,0 +1,161 @@ +# Patent Landscape Agent (LangChain Deep Agents + Stagehand + Browserbase) + +Runs a three-phase patent landscape research workflow given a technology area or assignee name. +Phase 1 fans out five specialist subagents in parallel (granted patents, prosecution history, +assignment/ownership, PTAB litigation, inventor network) across USPTO, EPO, and WIPO public portals. +Phase 2 walks the full family tree for each high-priority patent family. Phase 3 synthesizes +all findings into a structured memo at `./reports/final-memo.md` with a freedom-to-operate +assessment and a sources appendix of Browserbase session replay URLs. + +Uses **OpenAI as the single model provider** for both the Deep Agents orchestrator and all +Stagehand browser-agent sessions — no other provider keys are required. + +## Prerequisites + +- Python 3.11+ +- [Browserbase account](https://browserbase.com) — for headless browser sessions and Agent Identity +- OpenAI API key — for orchestration and Stagehand browser reasoning +- `BROWSERBASE_PROJECT_ID` is optional; set it to scope sessions to a specific Browserbase project + +## Install + +```bash +cd examples/integrations/langchain/patent-landscape-agent +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +Or with `uv`: + +```bash +uv venv && uv pip install -r requirements.txt +``` + +## Environment + +```bash +cp .env.example .env +``` + +Edit `.env`: + +```ini +OPENAI_API_KEY=sk-... +BROWSERBASE_API_KEY=bb_... +# BROWSERBASE_PROJECT_ID=prj_... # optional; scopes sessions to a specific project +``` + +## Run + +Default (PageRank benchmark): + +```bash +python agent.py +``` + +Custom query: + +```bash +python agent.py "MapReduce distributed computing — Google (Jeffrey Dean, Sanjay Ghemawat) US7650331" +python agent.py "Transformer attention mechanism — Google Brain, US10452978" +python agent.py "US6285999" # single seed patent, walks the full family +``` + +Override the orchestrator model: + +```bash +python agent.py --model gpt-4.1 "US6285999" +``` + +The final memo is always written to `./reports/final-memo.md` when the run completes. +Intermediate workpapers may also appear there if the orchestrator uses the filesystem tools +during the run. + +## Streaming subagent activity + +`create_deep_agent` returns a LangGraph compiled graph. To stream token-level output: + +```python +from agent import build_agent + +agent = build_agent(model="gpt-4.1") +config = {"configurable": {"thread_id": "stream-demo"}} + +for chunk in agent.stream( + {"messages": [{"role": "user", "content": "PageRank web search ranking — Stanford/Google US6285999"}]}, + config=config, + version="v2", + stream_mode="updates", +): + print(chunk) +``` + +## Session replays + +Every `patent_research` call opens a Browserbase browser session. The tool response includes a +`session_url` field of the form `https://browserbase.com/sessions/`. These URLs are collected +in the final memo's Sources Appendix and are accessible in the Browserbase dashboard for up to +7 days after the run. + +## Architecture + +``` +agent.py orchestrator (gpt-4.1, OpenAI) +├── tools/patent_research.py single tool: Stagehand agent over a Browserbase session +└── subagents/ + ├── granted_patents.py USPTO search, EPO Espacenet, WIPO Patentscope + ├── prosecution_history.py USPTO Patent Center file wrappers + ├── assignment_ownership.py USPTO Assignment Search, EPO Register + ├── litigation_history.py USPTO PTAB (IPR/PGR proceedings) + ├── inventor_network.py inventor portfolio mapping + └── patent_family_analysis.py continuations, divisionals, foreign equivalents +``` + +All browser interactions go through the `patent_research` tool, which creates a Browserbase +session per call, drives the portal with a Stagehand agent configured to use `openai/gpt-4o`, +and returns structured JSON with the extracted data and a session replay URL. + +## Benchmark: PageRank (Stanford / Google) + +**Query**: `"PageRank web search ranking algorithm — Stanford University / Google, seed patent US6285999"` + +- Seed patent: US6285999B1 (filed Jan 9, 1998 — Lawrence Page, Stanford University assignee) +- Licensed exclusively to Google; multiple US continuations filed through the 2000s +- EPO equivalent: EP1062579; also filed in Canada, Japan, and Australia +- Went through USPTO ex parte reexamination (reexam cert issued 2011) + +Well-known to software engineers, publicly well-documented, and a clean example of a +university-originated patent licensed to a commercial entity with a traceable family tree. + +Expected output: + +```markdown +# Patent Landscape Memo: PageRank (Stanford / Google) + +## Executive Summary +- US6285999 is the anchor; Stanford filed ~6 US continuations, all assigned to Google + via exclusive license executed in 1998. +- EPO EP1062579 granted; CA, JP, AU national phase entries confirmed. +- No active PTAB proceedings; ex parte reexamination closed 2011 with amended claims. +- Patent expired Jan 9, 2018 (20-year term from priority date) — no remaining FTO risk + on the core PageRank claims. Continuation US7058628 expired 2019. +- Inventor network: Lawrence Page and Sergey Brin; subsequent Stanford/Google filing + activity visible via inventor search. + +## Landscape Overview +... +``` + +Typical run stats: ~14 `patent_research` calls, 6 subagent invocations, ~30 min total runtime, +~150k input tokens. + +## Notes + +- All portals accessed are public (no login required). USPTO PTAB, Patent Center, and Assignment + Search, EPO Espacenet and Register, and WIPO Patentscope are fully public. +- Browserbase Agent Identity handles browser fingerprinting and CAPTCHAs automatically. +- `previous_session_id` in `patent_research` allows a subagent to issue follow-up queries on + the same portal page without starting a new session (e.g., paginating through search results). +- Workpapers in `./reports/` are plain markdown. The directory is gitignored; commit selectively + if you want to version specific memos. diff --git a/examples/integrations/langchain/patent-landscape-agent/agent.py b/examples/integrations/langchain/patent-landscape-agent/agent.py new file mode 100644 index 0000000..5c947ee --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/agent.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import argparse +import json +import os +import sys +import uuid +from typing import Any + +from deepagents import FilesystemPermission, create_deep_agent +from dotenv import load_dotenv +from langchain_openai import ChatOpenAI +from langgraph.checkpoint.memory import MemorySaver + +from subagents import ( + ASSIGNMENT_OWNERSHIP, + GRANTED_PATENTS, + INVENTOR_NETWORK, + LITIGATION_HISTORY, + PATENT_FAMILY_ANALYSIS, + PROSECUTION_HISTORY, +) +from tools import patent_research + + +SYSTEM_PROMPT = """You are a patent landscape research orchestrator. + +Given a technology area or assignee name (and optionally a seed patent number), +produce a structured patent landscape memo in three phases. + +Start by writing ./reports/research-plan.md with your todo list for this run. + +Phase 1 — fan out all five specialist subagents in parallel: +- granted-patents: search USPTO, EPO Espacenet, and WIPO Patentscope for relevant patents. +- prosecution-history: pull file wrapper data for top patents from USPTO Patent Center. +- assignment-ownership: trace assignment chains via USPTO Patent Assignment and EPO Register. +- litigation-history: search PTAB for IPR/PGR proceedings and outcomes. +- inventor-network: map inventors across their full portfolios and current affiliations. + +Each subagent writes its workpaper to ./reports/.md. + +Phase 2 — per-family deep dives: +- Read the granted-patents workpaper to identify the top 3–5 high-priority patent families. +- For each family, delegate to the patent-family-analysis subagent to walk the full family tree + (continuations, divisionals, foreign equivalents, PCT national phase entries). +- Each analysis is written to ./reports/family-.md. + +Phase 3 — synthesis: +- Read all workpapers from ./reports/ using read_file. +- Cross-reference ownership (assignment-ownership) with litigation risk (litigation-history). +- Surface freedom-to-operate flags based on lapsed or un-entered foreign equivalents. +- Write the final memo to ./reports/final-memo.md with the structure below. + +Final memo structure (./reports/final-memo.md): +1. Executive Summary (3–5 bullet points: landscape overview, top risks, FTO flags) +2. Landscape Overview — one section per Phase 1 track with key findings +3. Per-Family Deep Dives — one subsection per family with the family tree +4. Ownership & Litigation Cross-Reference — table of patents × PTAB proceedings × current owner +5. Freedom-to-Operate Flags — jurisdictions with lapsed or unprotected coverage +6. Sources Appendix — all Browserbase session replay URLs from patent_research calls +""" + + +def _require_env(name: str) -> str: + value = os.getenv(name, "").strip() + if not value: + raise ValueError(f"Missing required environment variable: {name}") + return value + + +def build_model(model: str) -> ChatOpenAI: + # OpenAI is the single model provider — no gateway, no fallback. + return ChatOpenAI( + model=model, + api_key=_require_env("OPENAI_API_KEY"), + ) + + +def build_agent(model: str): + os.makedirs("./reports", exist_ok=True) + return create_deep_agent( + model=build_model(model), + tools=[patent_research], + subagents=[ + GRANTED_PATENTS, + PROSECUTION_HISTORY, + ASSIGNMENT_OWNERSHIP, + LITIGATION_HISTORY, + INVENTOR_NETWORK, + PATENT_FAMILY_ANALYSIS, + ], + system_prompt=SYSTEM_PROMPT, + permissions=[FilesystemPermission(operations=["read", "write"], paths=[os.path.abspath("./reports")])], + checkpointer=MemorySaver(), + ) + + +def _stringify_content(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + if item.get("type") == "text": + parts.append(str(item.get("text", ""))) + else: + parts.append(json.dumps(item, default=str)) + else: + parts.append(str(item)) + return "\n".join(part for part in parts if part) + return json.dumps(content, indent=2, default=str) + + +def _final_text(result: Any) -> str: + state = getattr(result, "value", result) + if isinstance(state, dict): + messages = state.get("messages", []) + for message in reversed(messages): + msg_type = getattr(message, "type", None) + if msg_type is None and isinstance(message, dict): + msg_type = message.get("type") or message.get("role") + if msg_type in {"ai", "assistant"}: + content = getattr(message, "content", None) + if content is None and isinstance(message, dict): + content = message.get("content") + return _stringify_content(content) + return json.dumps(state, indent=2, default=str) + return str(state) + + +def run(query: str, model: str) -> str: + agent = build_agent(model=model) + config = {"configurable": {"thread_id": str(uuid.uuid4())}} + result = agent.invoke( + {"messages": [{"role": "user", "content": query}]}, + config=config, + version="v2", + ) + memo = _final_text(result) + memo_path = os.path.join(os.path.abspath("./reports"), "final-memo.md") + with open(memo_path, "w", encoding="utf-8") as f: + f.write(memo) + print(f"Memo saved to {memo_path}", file=sys.stderr) + return memo + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Patent landscape research agent powered by LangChain Deep Agents, " + "Stagehand, and Browserbase. Writes a structured memo to ./reports/final-memo.md." + ) + ) + parser.add_argument( + "query", + nargs="?", + default=( + "PageRank web search ranking algorithm — Stanford University / Google, " + "seed patent US6285999" + ), + help="Technology area, assignee name, or seed patent number to research.", + ) + parser.add_argument( + "--model", + default=os.getenv("PATENT_AGENT_MODEL", "gpt-4.1"), + help="OpenAI model for the orchestrator (default: gpt-4.1).", + ) + return parser.parse_args() + + +if __name__ == "__main__": + load_dotenv() + args = parse_args() + print(run(query=args.query, model=args.model)) diff --git a/examples/integrations/langchain/patent-landscape-agent/requirements.txt b/examples/integrations/langchain/patent-landscape-agent/requirements.txt new file mode 100644 index 0000000..0a9bb1a --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/requirements.txt @@ -0,0 +1,6 @@ +beautifulsoup4>=4.13.0 +browserbase>=1.8.0 +deepagents>=0.0.5 +langchain-openai>=0.3.0 +python-dotenv>=1.0.0 +stagehand>=3.19.5 diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/__init__.py b/examples/integrations/langchain/patent-landscape-agent/subagents/__init__.py new file mode 100644 index 0000000..10a9900 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/__init__.py @@ -0,0 +1,15 @@ +from subagents.granted_patents import GRANTED_PATENTS +from subagents.prosecution_history import PROSECUTION_HISTORY +from subagents.assignment_ownership import ASSIGNMENT_OWNERSHIP +from subagents.litigation_history import LITIGATION_HISTORY +from subagents.inventor_network import INVENTOR_NETWORK +from subagents.patent_family_analysis import PATENT_FAMILY_ANALYSIS + +__all__ = [ + "GRANTED_PATENTS", + "PROSECUTION_HISTORY", + "ASSIGNMENT_OWNERSHIP", + "LITIGATION_HISTORY", + "INVENTOR_NETWORK", + "PATENT_FAMILY_ANALYSIS", +] diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/assignment_ownership.py b/examples/integrations/langchain/patent-landscape-agent/subagents/assignment_ownership.py new file mode 100644 index 0000000..5753a6e --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/assignment_ownership.py @@ -0,0 +1,38 @@ +from tools import patent_research + +ASSIGNMENT_OWNERSHIP = { + "name": "assignment-ownership", + "description": ( + "Traces assignment chains and ownership changes for the top patents via " + "USPTO Patent Assignment Search and EPO Register. Identifies current owner " + "and any recorded security interests or licenses." + ), + "system_prompt": """You are a patent ownership specialist. + +You will receive a list of patent numbers from the granted-patents track. +Trace the full assignment chain for each patent. + +Steps: + +1. USPTO Patent Assignment Search (portal: uspto_assignments): + For each US patent number, extract: + - All recorded assignments in chronological order + - Assignor name and date + - Assignee name and date + - Reel/frame number + - Nature of conveyance (assignment, security interest, merger, license, etc.) + - Current owner of record + +2. EPO Register (portal: epo_register): + For each EP patent number, extract: + - Current proprietor + - Assignment history if recorded + - Any recorded licences or security interests + +Write your structured findings to ./reports/assignment-ownership.md. +Flag any security interests (pledges, mortgages) that encumber the patents. +Flag any assignments to shell companies or holding entities. +Include session_url for each research session. +""", + "tools": [patent_research], +} diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/granted_patents.py b/examples/integrations/langchain/patent-landscape-agent/subagents/granted_patents.py new file mode 100644 index 0000000..1b53be4 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/granted_patents.py @@ -0,0 +1,50 @@ +from tools import patent_research + +GRANTED_PATENTS = { + "name": "granted-patents", + "description": ( + "Searches USPTO Patent Public Search, EPO Espacenet, and WIPO Patentscope for " + "granted patents and published applications matching the technology area or assignee. " + "Returns patent numbers, titles, filing/grant dates, current status, and claim 1 text." + ), + "system_prompt": """You are a patent search specialist covering USPTO, EPO, and WIPO. + +Your task: find all granted patents and published applications relevant to the given technology area or assignee name. + +Steps: + +1. USPTO Patent Public Search (portal: uspto_search): + Search by assignee name or keyword. Extract for each result: + - Patent number (e.g., US8697359B2) + - Title + - Filing date + - Grant date + - Current status (granted, abandoned, pending) + - Assignee + - Claim 1 full text + - Application number + +2. EPO Espacenet (portal: epo_espacenet): + Search by assignee or keyword. Extract: + - Patent number (EP or WO) + - Title + - Filing date + - Grant date + - Designated states + - Claim 1 text + +3. WIPO Patentscope (portal: wipo_patentscope): + Search PCT applications. Extract: + - International application number + - Filing date + - International publication number + - Title + - Applicant + - Entering national phase countries + +Write your structured findings to ./reports/granted-patents.md. +Include the session_url from each patent_research call for audit purposes. +Identify the top 3-5 highest-priority patent families based on claim breadth and filing dates. +""", + "tools": [patent_research], +} diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/inventor_network.py b/examples/integrations/langchain/patent-landscape-agent/subagents/inventor_network.py new file mode 100644 index 0000000..2b6a1cc --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/inventor_network.py @@ -0,0 +1,40 @@ +from tools import patent_research + +INVENTOR_NETWORK = { + "name": "inventor-network", + "description": ( + "Extracts the inventor list from the top patents, surfaces their other patents " + "across USPTO and EPO, and identifies their current institutional affiliations." + ), + "system_prompt": """You are an inventor network analyst. + +You will receive a list of patent numbers from the granted-patents track. +For each patent, extract the inventor list, then map each inventor's broader portfolio. + +Steps: + +1. Extract inventors (portal: uspto_search or epo_espacenet): + For each patent, record: + - Full inventor name + - City and country of residence at time of filing + +2. Per inventor — search their full patent portfolio (portal: uspto_search): + Search by inventor name. Extract: + - All US patent numbers where they are listed as inventor + - Assignee for each patent (shows employer history) + - Filing dates (shows career timeline) + +3. Per inventor — cross-check EPO (portal: epo_espacenet): + Search by inventor name. Extract any EP patents not found in USPTO search. + +4. Current affiliation inference: + Based on the assignee of their most recent patents (last 2 years), + infer current institutional affiliation. Note if they have moved to a competitor. + +Write your structured findings to ./reports/inventor-network.md. +Flag inventors who have recently moved to direct competitors. +Flag inventors with pending patent applications at a new employer in the same technology area. +Include session_url for each research session. +""", + "tools": [patent_research], +} diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/litigation_history.py b/examples/integrations/langchain/patent-landscape-agent/subagents/litigation_history.py new file mode 100644 index 0000000..29a558b --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/litigation_history.py @@ -0,0 +1,42 @@ +from tools import patent_research + +LITIGATION_HISTORY = { + "name": "litigation-history", + "description": ( + "Searches USPTO PTAB for IPR and PGR proceedings against the top patents. " + "Identifies which patents have been challenged, the petitioner, and the outcome." + ), + "system_prompt": """You are a patent litigation specialist focused on PTAB proceedings. + +You will receive a list of patent numbers from the granted-patents track. +For each patent, search the USPTO Patent Trial and Appeal Board (PTAB) database. + +Primary portal: uspto_ptab +Fallback portal: google_patents (use if PTAB portal is unavailable or returns no results) + +For each patent, extract all IPR, PGR, CBM, and ex parte reexamination proceedings: +- Proceeding number (e.g., IPR2023-00123) +- Proceeding type (IPR, PGR, CBM, ex parte reexamination) +- Filing date +- Petitioner name +- Institution decision: date and outcome (instituted / denied) +- Final written decision: date and outcome (claims cancelled / confirmed / mixed) +- Whether an appeal was filed (Federal Circuit or Supreme Court) +- Current status + +Also note: +- Whether any claims were cancelled as a result +- Which claims survived if the proceeding went to final written decision +- Settlement agreements if recorded + +If the PTAB portal is inaccessible or returns a blank page, switch to google_patents and +search for the patent number. Google Patents displays PTAB proceedings under the "Events" +tab — extract the same fields from there. + +Write your structured findings to ./reports/litigation-history.md. +Flag patents with cancelled claims or pending PTAB challenges as high-risk. +If both sources were unavailable, state that explicitly with the portals attempted. +Include session_url for each research session. +""", + "tools": [patent_research], +} diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/patent_family_analysis.py b/examples/integrations/langchain/patent-landscape-agent/subagents/patent_family_analysis.py new file mode 100644 index 0000000..78a4cbc --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/patent_family_analysis.py @@ -0,0 +1,46 @@ +from tools import patent_research + +PATENT_FAMILY_ANALYSIS = { + "name": "patent-family-analysis", + "description": ( + "Walks the full patent family tree for a single high-priority patent: parent, " + "continuations, divisionals, and foreign equivalents. Called once per high-priority " + "family identified in Phase 1." + ), + "system_prompt": """You are a patent family analyst. + +You will receive a single patent number representing a high-priority family anchor. +Walk the full family tree for this patent across all jurisdictions. + +Steps: + +1. USPTO continuations and divisionals (portal: uspto_patent_center): + Starting from the given patent number: + - Find the priority chain: parent application, grandparent, etc. + - Find all child applications: continuations (CON), continuations-in-part (CIP), divisionals (DIV) + - For each, record: application number, patent number (if granted), filing date, status, title + +2. Foreign equivalents via EPO Espacenet (portal: epo_espacenet): + Search the INPADOC family for the anchor patent. Extract: + - All EP, WO, GB, DE, FR, JP, CN, KR, AU, CA equivalents + - Filing date and grant date in each jurisdiction + - Current status in each jurisdiction + - Which foreign equivalents have lapsed + +3. WIPO PCT application (portal: wipo_patentscope): + If the family has a PCT application, extract: + - International application number (PCT/...) + - International search report findings + - Countries that entered national phase + - Countries that did not enter national phase (possible freedom-to-operate gaps) + +Write your structured findings to ./reports/family-.md, substituting +the actual patent number into the filename. + +Produce a family tree diagram in ASCII or nested markdown list format. +Flag jurisdictions where the patent has lapsed or never entered national phase — +these represent potential freedom-to-operate opportunities. +Include session_url for each research session. +""", + "tools": [patent_research], +} diff --git a/examples/integrations/langchain/patent-landscape-agent/subagents/prosecution_history.py b/examples/integrations/langchain/patent-landscape-agent/subagents/prosecution_history.py new file mode 100644 index 0000000..bf1fe28 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/subagents/prosecution_history.py @@ -0,0 +1,36 @@ +from tools import patent_research + +PROSECUTION_HISTORY = { + "name": "prosecution-history", + "description": ( + "Pulls file wrapper data from USPTO Patent Center for the top patents identified by " + "the granted-patents track. Extracts office actions, examiner responses, RCEs, " + "and continuation status." + ), + "system_prompt": """You are a patent prosecution specialist. + +You will receive a list of patent numbers from the granted-patents track. +For each patent, retrieve the prosecution history from USPTO Patent Center. + +Primary portal: uspto_patent_center +Fallback portal: google_patents (use if Patent Center is unavailable or returns no results) + +For each patent number, extract: +- All office actions: date, type (restriction, non-final rejection, final rejection, allowance), grounds cited +- Applicant responses: date, type (RCE, amendment, appeal, interview) +- Examiner: name and art unit +- Total pendency (filing to grant) +- Whether a continuation, continuation-in-part, or divisional was filed +- Whether any claims were cancelled or narrowed during prosecution + +If Patent Center is inaccessible, switch to google_patents and search for the patent number. +Google Patents displays the prosecution timeline under the "Events" tab — extract dates and +event types from there as a best-effort substitute for the full file wrapper. + +Write your structured findings to ./reports/prosecution-history.md. +Flag patents where prosecution history suggests claim narrowing that limits scope. +If both sources were unavailable, state that explicitly with the portals attempted. +Include session_url for each research session. +""", + "tools": [patent_research], +} diff --git a/examples/integrations/langchain/patent-landscape-agent/tools/__init__.py b/examples/integrations/langchain/patent-landscape-agent/tools/__init__.py new file mode 100644 index 0000000..97f08ad --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/tools/__init__.py @@ -0,0 +1,3 @@ +from tools.patent_research import patent_research + +__all__ = ["patent_research"] diff --git a/examples/integrations/langchain/patent-landscape-agent/tools/patent_research.py b/examples/integrations/langchain/patent-landscape-agent/tools/patent_research.py new file mode 100644 index 0000000..795bb91 --- /dev/null +++ b/examples/integrations/langchain/patent-landscape-agent/tools/patent_research.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +import asyncio +import json +import os +from typing import Any + +from browserbase import Browserbase +from langchain.tools import tool +from stagehand import AsyncStagehand + +# OpenAI is the single model provider for both Stagehand act/extract and agent execution. +# "openai/gpt-4.1" follows the Stagehand provider/model naming convention. +STAGEHAND_MODEL = os.getenv("STAGEHAND_MODEL", "openai/gpt-4o") + +PORTAL_URLS: dict[str, str] = { + "uspto_search": "https://ppubs.uspto.gov/pubwebapp/", + "uspto_patent_center": "https://patentcenter.uspto.gov/", + "uspto_assignments": "https://assignment.uspto.gov/patent/index.html#/patent/search", + "uspto_ptab": "https://ptab.uspto.gov/", + "epo_espacenet": "https://worldwide.espacenet.com/", + "epo_register": "https://register.epo.org/", + "wipo_patentscope": "https://patentscope.wipo.int/search/en/search.jsf", + # Fallback for PTAB proceedings and prosecution history when USPTO portals are unavailable. + "google_patents": "https://patents.google.com/", +} + + +def _require_env(name: str) -> str: + value = os.getenv(name, "").strip() + if not value: + raise ValueError(f"Missing required environment variable: {name}") + return value + + +def _normalize(value: Any) -> Any: + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, dict): + return {str(key): _normalize(val) for key, val in value.items()} + if isinstance(value, (list, tuple, set)): + return [_normalize(item) for item in value] + if hasattr(value, "model_dump"): + return _normalize(value.model_dump()) + if hasattr(value, "dict"): + return _normalize(value.dict()) + if hasattr(value, "__dict__"): + public = { + key: val + for key, val in vars(value).items() + if not key.startswith("_") and not callable(val) + } + if public: + return _normalize(public) + return str(value) + + +def _json(value: Any) -> str: + return json.dumps(_normalize(value), indent=2, default=str) + + +def _stagehand_client() -> AsyncStagehand: + # model_api_key must be passed explicitly — Stagehand does not read it from env. + kwargs: dict = { + "browserbase_api_key": _require_env("BROWSERBASE_API_KEY"), + "model_api_key": _require_env("OPENAI_API_KEY"), + } + project_id = os.getenv("BROWSERBASE_PROJECT_ID", "").strip() + if project_id: + kwargs["browserbase_project_id"] = project_id + return AsyncStagehand(**kwargs) + + +def _run_async(coro: Any) -> Any: + return asyncio.run(coro) + + +@tool +def patent_research( + portal: str, + query: str, + output_schema: str, + previous_session_id: str = "", +) -> str: + """Research patents on a specific public portal using a Stagehand browser agent. + + Args: + portal: Which portal to query. One of: uspto_search, uspto_patent_center, + uspto_assignments, uspto_ptab, epo_espacenet, epo_register, wipo_patentscope. + query: Natural-language description of what to find and extract. + output_schema: JSON object describing the fields to extract (e.g. + '{"patent_number": "string", "title": "string", "status": "string"}'). + previous_session_id: If set, attach to this existing Browserbase session instead + of starting a new one (useful for follow-up queries on the same portal page). + Leave empty to start a fresh session. + + Returns: + JSON with keys: portal, session_id, session_url (Browserbase replay link), + query, result (extracted structured data). + """ + return _run_async( + _patent_research_async( + portal=portal, + query=query, + output_schema=output_schema, + previous_session_id=previous_session_id or None, + ) + ) + + +async def _patent_research_async( + portal: str, + query: str, + output_schema: str, + previous_session_id: str | None, +) -> str: + if portal not in PORTAL_URLS: + return _json({ + "error": f"Unknown portal '{portal}'. Valid values: {list(PORTAL_URLS)}", + }) + + client = _stagehand_client() + new_session = previous_session_id is None + + if new_session: + start_resp = await client.sessions.start(model_name=STAGEHAND_MODEL) + session_id = start_resp.data.session_id + else: + session_id = previous_session_id + + instruction = ( + f"{query}\n\n" + f"Extract a JSON object matching this schema: {output_schema}" + ) + + try: + if new_session: + await client.sessions.navigate( + id=session_id, + url=PORTAL_URLS[portal], + frame_id="", + ) + + result = await client.sessions.execute( + id=session_id, + execute_options={ + "instruction": instruction, + "max_steps": 30, + }, + agent_config={ + # OpenAI is the single model provider — same key, same model throughout. + "model": STAGEHAND_MODEL, + "instructions": ( + "You are a patent research assistant performing read-only lookups " + "on public patent databases. Extract all requested fields precisely. " + "Do not submit forms that could alter data. Stop once the lookup is complete." + ), + }, + timeout=300.0, + ) + data = _normalize(result) + return _json({ + "portal": portal, + "session_id": session_id, + "session_url": f"https://browserbase.com/sessions/{session_id}", + "query": query, + "result": data, + }) + finally: + # Only end sessions we started; reused sessions are the caller's responsibility. + if new_session: + await client.sessions.end(id=session_id)