Skip to content

Commit c2c2244

Browse files
Differentiate mapping field inspection and read failures
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent b74ffc1 commit c2c2244

2 files changed

Lines changed: 154 additions & 9 deletions

File tree

hyperbrowser/tools/__init__.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,13 @@ def _read_tool_response_data(response: Any, *, tool_name: str) -> Any:
198198
raise HyperbrowserError(f"{tool_name} response must include 'data'")
199199
try:
200200
return response["data"]
201-
except KeyError:
202-
raise HyperbrowserError(f"{tool_name} response must include 'data'")
203201
except HyperbrowserError:
204202
raise
203+
except KeyError as exc:
204+
raise HyperbrowserError(
205+
f"Failed to read {tool_name} response data",
206+
original_error=exc,
207+
) from exc
205208
except Exception as exc:
206209
raise HyperbrowserError(
207210
f"Failed to read {tool_name} response data",
@@ -230,11 +233,25 @@ def _read_optional_tool_response_field(
230233
return ""
231234
if isinstance(response_data, MappingABC):
232235
try:
233-
field_value = response_data[field_name]
234-
except KeyError:
236+
has_field = field_name in response_data
237+
except HyperbrowserError:
238+
raise
239+
except Exception as exc:
240+
raise HyperbrowserError(
241+
f"Failed to inspect {tool_name} response field '{field_name}'",
242+
original_error=exc,
243+
) from exc
244+
if not has_field:
235245
return ""
246+
try:
247+
field_value = response_data[field_name]
236248
except HyperbrowserError:
237249
raise
250+
except KeyError as exc:
251+
raise HyperbrowserError(
252+
f"Failed to read {tool_name} response field '{field_name}'",
253+
original_error=exc,
254+
) from exc
238255
except Exception as exc:
239256
raise HyperbrowserError(
240257
f"Failed to read {tool_name} response field '{field_name}'",
@@ -265,11 +282,25 @@ def _read_optional_tool_response_field(
265282
def _read_crawl_page_field(page: Any, *, field_name: str, page_index: int) -> Any:
266283
if isinstance(page, MappingABC):
267284
try:
268-
return page[field_name]
269-
except KeyError:
285+
has_field = field_name in page
286+
except HyperbrowserError:
287+
raise
288+
except Exception as exc:
289+
raise HyperbrowserError(
290+
f"Failed to inspect crawl tool page field '{field_name}' at index {page_index}",
291+
original_error=exc,
292+
) from exc
293+
if not has_field:
270294
return None
295+
try:
296+
return page[field_name]
271297
except HyperbrowserError:
272298
raise
299+
except KeyError as exc:
300+
raise HyperbrowserError(
301+
f"Failed to read crawl tool page field '{field_name}' at index {page_index}",
302+
original_error=exc,
303+
) from exc
273304
except Exception as exc:
274305
raise HyperbrowserError(
275306
f"Failed to read crawl tool page field '{field_name}' at index {page_index}",

tests/test_tools_response_handling.py

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def __getitem__(self, key: str) -> object:
169169
assert exc_info.value.original_error is not None
170170

171171

172-
def test_scrape_tool_rejects_mapping_responses_missing_data_on_lookup():
172+
def test_scrape_tool_wraps_mapping_responses_keyerrors_on_data_lookup():
173173
class _InconsistentResponse(Mapping[str, object]):
174174
def __iter__(self):
175175
yield "data"
@@ -187,10 +187,12 @@ def __getitem__(self, key: str) -> object:
187187
client = _SyncScrapeClient(_InconsistentResponse()) # type: ignore[arg-type]
188188

189189
with pytest.raises(
190-
HyperbrowserError, match="scrape tool response must include 'data'"
191-
):
190+
HyperbrowserError, match="Failed to read scrape tool response data"
191+
) as exc_info:
192192
WebsiteScrapeTool.runnable(client, {"url": "https://example.com"})
193193

194+
assert exc_info.value.original_error is not None
195+
194196

195197
def test_scrape_tool_wraps_mapping_response_data_inspection_failures():
196198
class _BrokenContainsResponse(Mapping[str, object]):
@@ -343,6 +345,9 @@ def __iter__(self):
343345
def __len__(self) -> int:
344346
return 1
345347

348+
def __contains__(self, key: object) -> bool:
349+
return key == "markdown"
350+
346351
def __getitem__(self, key: str) -> object:
347352
_ = key
348353
raise RuntimeError("cannot read mapping field")
@@ -358,6 +363,59 @@ def __getitem__(self, key: str) -> object:
358363
assert exc_info.value.original_error is not None
359364

360365

366+
def test_scrape_tool_wraps_mapping_field_keyerrors_after_membership_check():
367+
class _InconsistentMapping(Mapping[str, object]):
368+
def __iter__(self):
369+
yield "markdown"
370+
371+
def __len__(self) -> int:
372+
return 1
373+
374+
def __contains__(self, key: object) -> bool:
375+
return key == "markdown"
376+
377+
def __getitem__(self, key: str) -> object:
378+
_ = key
379+
raise KeyError("markdown")
380+
381+
client = _SyncScrapeClient(_Response(data=_InconsistentMapping()))
382+
383+
with pytest.raises(
384+
HyperbrowserError,
385+
match="Failed to read scrape tool response field 'markdown'",
386+
) as exc_info:
387+
WebsiteScrapeTool.runnable(client, {"url": "https://example.com"})
388+
389+
assert exc_info.value.original_error is not None
390+
391+
392+
def test_scrape_tool_wraps_mapping_field_inspection_failures():
393+
class _BrokenContainsMapping(Mapping[str, object]):
394+
def __iter__(self):
395+
yield "markdown"
396+
397+
def __len__(self) -> int:
398+
return 1
399+
400+
def __contains__(self, key: object) -> bool:
401+
_ = key
402+
raise RuntimeError("cannot inspect markdown key")
403+
404+
def __getitem__(self, key: str) -> object:
405+
_ = key
406+
return "ignored"
407+
408+
client = _SyncScrapeClient(_Response(data=_BrokenContainsMapping()))
409+
410+
with pytest.raises(
411+
HyperbrowserError,
412+
match="Failed to inspect scrape tool response field 'markdown'",
413+
) as exc_info:
414+
WebsiteScrapeTool.runnable(client, {"url": "https://example.com"})
415+
416+
assert exc_info.value.original_error is not None
417+
418+
361419
def test_screenshot_tool_rejects_non_string_screenshot_field():
362420
client = _SyncScrapeClient(_Response(data=SimpleNamespace(screenshot=123)))
363421

@@ -454,6 +512,9 @@ def __iter__(self):
454512
def __len__(self) -> int:
455513
return 1
456514

515+
def __contains__(self, key: object) -> bool:
516+
return key == "markdown"
517+
457518
def __getitem__(self, key: str) -> object:
458519
_ = key
459520
raise RuntimeError("cannot read page field")
@@ -469,6 +530,59 @@ def __getitem__(self, key: str) -> object:
469530
assert exc_info.value.original_error is not None
470531

471532

533+
def test_crawl_tool_wraps_mapping_page_keyerrors_after_membership_check():
534+
class _InconsistentPage(Mapping[str, object]):
535+
def __iter__(self):
536+
yield "markdown"
537+
538+
def __len__(self) -> int:
539+
return 1
540+
541+
def __contains__(self, key: object) -> bool:
542+
return key == "markdown"
543+
544+
def __getitem__(self, key: str) -> object:
545+
_ = key
546+
raise KeyError("markdown")
547+
548+
client = _SyncCrawlClient(_Response(data=[_InconsistentPage()]))
549+
550+
with pytest.raises(
551+
HyperbrowserError,
552+
match="Failed to read crawl tool page field 'markdown' at index 0",
553+
) as exc_info:
554+
WebsiteCrawlTool.runnable(client, {"url": "https://example.com"})
555+
556+
assert exc_info.value.original_error is not None
557+
558+
559+
def test_crawl_tool_wraps_mapping_page_inspection_failures():
560+
class _BrokenContainsPage(Mapping[str, object]):
561+
def __iter__(self):
562+
yield "markdown"
563+
564+
def __len__(self) -> int:
565+
return 1
566+
567+
def __contains__(self, key: object) -> bool:
568+
_ = key
569+
raise RuntimeError("cannot inspect markdown key")
570+
571+
def __getitem__(self, key: str) -> object:
572+
_ = key
573+
return "ignored"
574+
575+
client = _SyncCrawlClient(_Response(data=[_BrokenContainsPage()]))
576+
577+
with pytest.raises(
578+
HyperbrowserError,
579+
match="Failed to inspect crawl tool page field 'markdown' at index 0",
580+
) as exc_info:
581+
WebsiteCrawlTool.runnable(client, {"url": "https://example.com"})
582+
583+
assert exc_info.value.original_error is not None
584+
585+
472586
def test_crawl_tool_rejects_non_string_page_urls():
473587
client = _SyncCrawlClient(
474588
_Response(data=[SimpleNamespace(url=42, markdown="body")])

0 commit comments

Comments
 (0)