Skip to content

Commit b36ea3e

Browse files
Share extract start payload construction across managers
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent e954551 commit b36ea3e

7 files changed

Lines changed: 161 additions & 24 deletions

File tree

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ This runs lint, format checks, compile checks, tests, and package build.
9090
- `tests/test_examples_naming_convention.py` (example sync/async prefix naming enforcement),
9191
- `tests/test_examples_syntax.py` (example script syntax guardrail),
9292
- `tests/test_extension_create_helper_usage.py` (extension create-input normalization helper usage enforcement),
93+
- `tests/test_extract_payload_helper_usage.py` (extract start-payload helper usage enforcement),
9394
- `tests/test_guardrail_ast_utils.py` (shared AST guard utility contract),
9495
- `tests/test_job_pagination_helper_usage.py` (shared scrape/crawl pagination helper usage enforcement),
9596
- `tests/test_makefile_quality_targets.py` (Makefile quality-gate target enforcement),

hyperbrowser/client/managers/async_manager/extract.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from typing import Optional
22

3-
from hyperbrowser.exceptions import HyperbrowserError
43
from hyperbrowser.models.consts import POLLING_ATTEMPTS
54
from hyperbrowser.models.extract import (
65
ExtractJobResponse,
@@ -13,8 +12,7 @@
1312
ensure_started_job_id,
1413
wait_for_job_result_async,
1514
)
16-
from ..serialization_utils import serialize_model_dump_to_dict
17-
from ...schema_utils import resolve_schema_input
15+
from ..extract_payload_utils import build_extract_start_payload
1816
from ..response_utils import parse_response_model
1917

2018

@@ -23,15 +21,7 @@ def __init__(self, client):
2321
self._client = client
2422

2523
async def start(self, params: StartExtractJobParams) -> StartExtractJobResponse:
26-
if not params.schema_ and not params.prompt:
27-
raise HyperbrowserError("Either schema or prompt must be provided")
28-
29-
payload = serialize_model_dump_to_dict(
30-
params,
31-
error_message="Failed to serialize extract start params",
32-
)
33-
if params.schema_:
34-
payload["schema"] = resolve_schema_input(params.schema_)
24+
payload = build_extract_start_payload(params)
3525

3626
response = await self._client.transport.post(
3727
self._client._build_url("/extract"),
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from typing import Any, Dict
2+
3+
from hyperbrowser.exceptions import HyperbrowserError
4+
from hyperbrowser.models.extract import StartExtractJobParams
5+
6+
from ..schema_utils import resolve_schema_input
7+
from .serialization_utils import serialize_model_dump_to_dict
8+
9+
10+
def build_extract_start_payload(params: StartExtractJobParams) -> Dict[str, Any]:
11+
if not params.schema_ and not params.prompt:
12+
raise HyperbrowserError("Either schema or prompt must be provided")
13+
14+
payload = serialize_model_dump_to_dict(
15+
params,
16+
error_message="Failed to serialize extract start params",
17+
)
18+
if params.schema_:
19+
payload["schema"] = resolve_schema_input(params.schema_)
20+
return payload

hyperbrowser/client/managers/sync_manager/extract.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from typing import Optional
22

3-
from hyperbrowser.exceptions import HyperbrowserError
43
from hyperbrowser.models.consts import POLLING_ATTEMPTS
54
from hyperbrowser.models.extract import (
65
ExtractJobResponse,
@@ -9,8 +8,7 @@
98
StartExtractJobResponse,
109
)
1110
from ...polling import build_operation_name, ensure_started_job_id, wait_for_job_result
12-
from ..serialization_utils import serialize_model_dump_to_dict
13-
from ...schema_utils import resolve_schema_input
11+
from ..extract_payload_utils import build_extract_start_payload
1412
from ..response_utils import parse_response_model
1513

1614

@@ -19,15 +17,7 @@ def __init__(self, client):
1917
self._client = client
2018

2119
def start(self, params: StartExtractJobParams) -> StartExtractJobResponse:
22-
if not params.schema_ and not params.prompt:
23-
raise HyperbrowserError("Either schema or prompt must be provided")
24-
25-
payload = serialize_model_dump_to_dict(
26-
params,
27-
error_message="Failed to serialize extract start params",
28-
)
29-
if params.schema_:
30-
payload["schema"] = resolve_schema_input(params.schema_)
20+
payload = build_extract_start_payload(params)
3121

3222
response = self._client.transport.post(
3323
self._client._build_url("/extract"),

tests/test_architecture_marker_usage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"tests/test_examples_syntax.py",
3232
"tests/test_docs_python3_commands.py",
3333
"tests/test_extension_create_helper_usage.py",
34+
"tests/test_extract_payload_helper_usage.py",
3435
"tests/test_examples_naming_convention.py",
3536
"tests/test_job_pagination_helper_usage.py",
3637
"tests/test_example_sync_async_parity.py",
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
pytestmark = pytest.mark.architecture
6+
7+
8+
MODULES = (
9+
"hyperbrowser/client/managers/sync_manager/extract.py",
10+
"hyperbrowser/client/managers/async_manager/extract.py",
11+
)
12+
13+
14+
def test_extract_managers_use_shared_extract_payload_helper():
15+
for module_path in MODULES:
16+
module_text = Path(module_path).read_text(encoding="utf-8")
17+
assert "build_extract_start_payload(" in module_text
18+
assert "Either schema or prompt must be provided" not in module_text
19+
assert "serialize_model_dump_to_dict(" not in module_text
20+
assert "resolve_schema_input(" not in module_text
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from types import MappingProxyType
2+
3+
import pytest
4+
from pydantic import BaseModel
5+
6+
import hyperbrowser.client.managers.extract_payload_utils as extract_payload_utils
7+
from hyperbrowser.exceptions import HyperbrowserError
8+
from hyperbrowser.models.extract import StartExtractJobParams
9+
10+
11+
def test_build_extract_start_payload_requires_schema_or_prompt():
12+
params = StartExtractJobParams(urls=["https://example.com"])
13+
14+
with pytest.raises(HyperbrowserError, match="Either schema or prompt must be provided"):
15+
extract_payload_utils.build_extract_start_payload(params)
16+
17+
18+
def test_build_extract_start_payload_serializes_prompt_payload():
19+
params = StartExtractJobParams(
20+
urls=["https://example.com"],
21+
prompt="extract content",
22+
)
23+
24+
payload = extract_payload_utils.build_extract_start_payload(params)
25+
26+
assert payload["urls"] == ["https://example.com"]
27+
assert payload["prompt"] == "extract content"
28+
29+
30+
def test_build_extract_start_payload_resolves_schema_values(monkeypatch: pytest.MonkeyPatch):
31+
class _SchemaModel(BaseModel):
32+
title: str
33+
34+
params = StartExtractJobParams(
35+
urls=["https://example.com"],
36+
schema=_SchemaModel,
37+
)
38+
39+
monkeypatch.setattr(
40+
extract_payload_utils,
41+
"resolve_schema_input",
42+
lambda schema_input: {"resolvedSchema": schema_input.__name__},
43+
)
44+
45+
payload = extract_payload_utils.build_extract_start_payload(params)
46+
47+
assert payload["schema"] == {"resolvedSchema": "_SchemaModel"}
48+
49+
50+
def test_build_extract_start_payload_wraps_serialization_errors(
51+
monkeypatch: pytest.MonkeyPatch,
52+
):
53+
params = StartExtractJobParams(
54+
urls=["https://example.com"],
55+
prompt="extract content",
56+
)
57+
58+
def _raise_model_dump_error(*args, **kwargs):
59+
_ = args
60+
_ = kwargs
61+
raise RuntimeError("broken model_dump")
62+
63+
monkeypatch.setattr(StartExtractJobParams, "model_dump", _raise_model_dump_error)
64+
65+
with pytest.raises(
66+
HyperbrowserError, match="Failed to serialize extract start params"
67+
) as exc_info:
68+
extract_payload_utils.build_extract_start_payload(params)
69+
70+
assert isinstance(exc_info.value.original_error, RuntimeError)
71+
72+
73+
def test_build_extract_start_payload_preserves_hyperbrowser_serialization_errors(
74+
monkeypatch: pytest.MonkeyPatch,
75+
):
76+
params = StartExtractJobParams(
77+
urls=["https://example.com"],
78+
prompt="extract content",
79+
)
80+
81+
def _raise_model_dump_error(*args, **kwargs):
82+
_ = args
83+
_ = kwargs
84+
raise HyperbrowserError("custom model_dump failure")
85+
86+
monkeypatch.setattr(StartExtractJobParams, "model_dump", _raise_model_dump_error)
87+
88+
with pytest.raises(
89+
HyperbrowserError, match="custom model_dump failure"
90+
) as exc_info:
91+
extract_payload_utils.build_extract_start_payload(params)
92+
93+
assert exc_info.value.original_error is None
94+
95+
96+
def test_build_extract_start_payload_rejects_non_dict_serialized_payload(
97+
monkeypatch: pytest.MonkeyPatch,
98+
):
99+
params = StartExtractJobParams(
100+
urls=["https://example.com"],
101+
prompt="extract content",
102+
)
103+
104+
monkeypatch.setattr(
105+
StartExtractJobParams,
106+
"model_dump",
107+
lambda *args, **kwargs: MappingProxyType({"urls": ["https://example.com"]}),
108+
)
109+
110+
with pytest.raises(
111+
HyperbrowserError, match="Failed to serialize extract start params"
112+
) as exc_info:
113+
extract_payload_utils.build_extract_start_payload(params)
114+
115+
assert exc_info.value.original_error is None

0 commit comments

Comments
 (0)