Skip to content

Commit be2ff25

Browse files
Bound and sanitize manager job operation names end-to-end
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent b274836 commit be2ff25

2 files changed

Lines changed: 169 additions & 1 deletion

File tree

hyperbrowser/client/polling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def build_operation_name(prefix: str, identifier: str) -> str:
8383
_MAX_OPERATION_NAME_LENGTH - len(prefix) - len(_TRUNCATED_OPERATION_NAME_SUFFIX)
8484
)
8585
if available_identifier_length > 0:
86-
truncated_identifier = identifier[:available_identifier_length]
86+
truncated_identifier = normalized_identifier[:available_identifier_length]
8787
return f"{prefix}{truncated_identifier}{_TRUNCATED_OPERATION_NAME_SUFFIX}"
8888
return prefix[:_MAX_OPERATION_NAME_LENGTH]
8989

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import asyncio
2+
from types import SimpleNamespace
3+
4+
import hyperbrowser.client.managers.async_manager.crawl as async_crawl_module
5+
import hyperbrowser.client.managers.async_manager.extract as async_extract_module
6+
import hyperbrowser.client.managers.sync_manager.crawl as sync_crawl_module
7+
import hyperbrowser.client.managers.sync_manager.extract as sync_extract_module
8+
9+
10+
class _DummyClient:
11+
transport = None
12+
13+
14+
def _assert_valid_operation_name(value: str) -> None:
15+
assert isinstance(value, str)
16+
assert value
17+
assert len(value) <= 200
18+
assert value == value.strip()
19+
assert not any(ord(character) < 32 or ord(character) == 127 for character in value)
20+
21+
22+
def test_sync_extract_manager_bounds_operation_name(monkeypatch):
23+
manager = sync_extract_module.ExtractManager(_DummyClient())
24+
long_job_id = " \n" + ("x" * 500) + "\t"
25+
captured = {}
26+
27+
monkeypatch.setattr(
28+
manager,
29+
"start",
30+
lambda params: SimpleNamespace(job_id=long_job_id),
31+
)
32+
33+
def fake_wait_for_job_result(**kwargs):
34+
operation_name = kwargs["operation_name"]
35+
_assert_valid_operation_name(operation_name)
36+
assert operation_name.startswith("extract job ")
37+
captured["operation_name"] = operation_name
38+
return {"ok": True}
39+
40+
monkeypatch.setattr(
41+
sync_extract_module, "wait_for_job_result", fake_wait_for_job_result
42+
)
43+
44+
result = manager.start_and_wait(params=object()) # type: ignore[arg-type]
45+
46+
assert result == {"ok": True}
47+
assert captured["operation_name"].endswith("...")
48+
49+
50+
def test_sync_crawl_manager_bounds_operation_name_for_polling_and_pagination(
51+
monkeypatch,
52+
):
53+
manager = sync_crawl_module.CrawlManager(_DummyClient())
54+
long_job_id = " \n" + ("x" * 500) + "\t"
55+
captured = {}
56+
57+
monkeypatch.setattr(
58+
manager,
59+
"start",
60+
lambda params: SimpleNamespace(job_id=long_job_id),
61+
)
62+
63+
def fake_poll_until_terminal_status(**kwargs):
64+
operation_name = kwargs["operation_name"]
65+
_assert_valid_operation_name(operation_name)
66+
captured["poll_operation_name"] = operation_name
67+
return "completed"
68+
69+
def fake_collect_paginated_results(**kwargs):
70+
operation_name = kwargs["operation_name"]
71+
_assert_valid_operation_name(operation_name)
72+
captured["collect_operation_name"] = operation_name
73+
assert operation_name == captured["poll_operation_name"]
74+
75+
monkeypatch.setattr(
76+
sync_crawl_module,
77+
"poll_until_terminal_status",
78+
fake_poll_until_terminal_status,
79+
)
80+
monkeypatch.setattr(
81+
sync_crawl_module,
82+
"collect_paginated_results",
83+
fake_collect_paginated_results,
84+
)
85+
86+
result = manager.start_and_wait(params=object(), return_all_pages=True) # type: ignore[arg-type]
87+
88+
assert result.status == "completed"
89+
assert captured["poll_operation_name"].startswith("crawl job ")
90+
assert captured["poll_operation_name"].endswith("...")
91+
92+
93+
def test_async_extract_manager_bounds_operation_name(monkeypatch):
94+
async def run() -> None:
95+
manager = async_extract_module.ExtractManager(_DummyClient())
96+
long_job_id = " \n" + ("x" * 500) + "\t"
97+
captured = {}
98+
99+
async def fake_start(params):
100+
return SimpleNamespace(job_id=long_job_id)
101+
102+
async def fake_wait_for_job_result_async(**kwargs):
103+
operation_name = kwargs["operation_name"]
104+
_assert_valid_operation_name(operation_name)
105+
assert operation_name.startswith("extract job ")
106+
captured["operation_name"] = operation_name
107+
return {"ok": True}
108+
109+
monkeypatch.setattr(manager, "start", fake_start)
110+
monkeypatch.setattr(
111+
async_extract_module,
112+
"wait_for_job_result_async",
113+
fake_wait_for_job_result_async,
114+
)
115+
116+
result = await manager.start_and_wait(params=object()) # type: ignore[arg-type]
117+
118+
assert result == {"ok": True}
119+
assert captured["operation_name"].endswith("...")
120+
121+
asyncio.run(run())
122+
123+
124+
def test_async_crawl_manager_bounds_operation_name_for_polling_and_pagination(
125+
monkeypatch,
126+
):
127+
async def run() -> None:
128+
manager = async_crawl_module.CrawlManager(_DummyClient())
129+
long_job_id = " \n" + ("x" * 500) + "\t"
130+
captured = {}
131+
132+
async def fake_start(params):
133+
return SimpleNamespace(job_id=long_job_id)
134+
135+
async def fake_poll_until_terminal_status_async(**kwargs):
136+
operation_name = kwargs["operation_name"]
137+
_assert_valid_operation_name(operation_name)
138+
captured["poll_operation_name"] = operation_name
139+
return "completed"
140+
141+
async def fake_collect_paginated_results_async(**kwargs):
142+
operation_name = kwargs["operation_name"]
143+
_assert_valid_operation_name(operation_name)
144+
captured["collect_operation_name"] = operation_name
145+
assert operation_name == captured["poll_operation_name"]
146+
147+
monkeypatch.setattr(manager, "start", fake_start)
148+
monkeypatch.setattr(
149+
async_crawl_module,
150+
"poll_until_terminal_status_async",
151+
fake_poll_until_terminal_status_async,
152+
)
153+
monkeypatch.setattr(
154+
async_crawl_module,
155+
"collect_paginated_results_async",
156+
fake_collect_paginated_results_async,
157+
)
158+
159+
result = await manager.start_and_wait(
160+
params=object(), # type: ignore[arg-type]
161+
return_all_pages=True,
162+
)
163+
164+
assert result.status == "completed"
165+
assert captured["poll_operation_name"].startswith("crawl job ")
166+
assert captured["poll_operation_name"].endswith("...")
167+
168+
asyncio.run(run())

0 commit comments

Comments
 (0)