Skip to content

Commit b5cf933

Browse files
Validate crawl tool page object types
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent 5dd1f55 commit b5cf933

2 files changed

Lines changed: 33 additions & 0 deletions

File tree

hyperbrowser/tools/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,15 @@
2626

2727
_MAX_KEY_DISPLAY_LENGTH = 120
2828
_TRUNCATED_KEY_DISPLAY_SUFFIX = "... (truncated)"
29+
_NON_OBJECT_CRAWL_PAGE_TYPES = (
30+
str,
31+
bytes,
32+
bytearray,
33+
memoryview,
34+
int,
35+
float,
36+
bool,
37+
)
2938

3039

3140
def _format_tool_param_key_for_error(key: str) -> str:
@@ -232,6 +241,10 @@ def _render_crawl_markdown_output(response_data: Any) -> str:
232241
) from exc
233242
markdown_sections: list[str] = []
234243
for index, page in enumerate(crawl_pages):
244+
if page is None or isinstance(page, _NON_OBJECT_CRAWL_PAGE_TYPES):
245+
raise HyperbrowserError(
246+
f"crawl tool page must be an object at index {index}"
247+
)
235248
page_markdown = _read_crawl_page_field(
236249
page, field_name="markdown", page_index=index
237250
)

tests/test_tools_response_handling.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,15 @@ def markdown(self) -> str:
349349
assert exc_info.value.original_error is not None
350350

351351

352+
def test_crawl_tool_rejects_non_object_page_items():
353+
client = _SyncCrawlClient(_Response(data=[123]))
354+
355+
with pytest.raises(
356+
HyperbrowserError, match="crawl tool page must be an object at index 0"
357+
):
358+
WebsiteCrawlTool.runnable(client, {"url": "https://example.com"})
359+
360+
352361
def test_crawl_tool_supports_mapping_page_items():
353362
client = _SyncCrawlClient(
354363
_Response(data=[{"url": "https://example.com", "markdown": "mapping body"}])
@@ -491,6 +500,17 @@ async def run() -> None:
491500
asyncio.run(run())
492501

493502

503+
def test_async_crawl_tool_rejects_non_object_page_items():
504+
async def run() -> None:
505+
client = _AsyncCrawlClient(_Response(data=[123]))
506+
with pytest.raises(
507+
HyperbrowserError, match="crawl tool page must be an object at index 0"
508+
):
509+
await WebsiteCrawlTool.async_runnable(client, {"url": "https://example.com"})
510+
511+
asyncio.run(run())
512+
513+
494514
def test_async_browser_use_tool_rejects_non_string_final_result():
495515
async def run() -> None:
496516
client = _AsyncBrowserUseClient(

0 commit comments

Comments
 (0)