Skip to content

Commit 925cbaa

Browse files
Improve polling resilience on transient status errors
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent 4abb2c3 commit 925cbaa

2 files changed

Lines changed: 82 additions & 2 deletions

File tree

hyperbrowser/client/polling.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,29 @@ def poll_until_terminal_status(
2020
is_terminal_status: Callable[[str], bool],
2121
poll_interval_seconds: float,
2222
max_wait_seconds: Optional[float],
23+
max_status_failures: int = 5,
2324
) -> str:
2425
start_time = time.monotonic()
26+
failures = 0
2527

2628
while True:
2729
if has_exceeded_max_wait(start_time, max_wait_seconds):
2830
raise HyperbrowserError(
2931
f"Timed out waiting for {operation_name} after {max_wait_seconds} seconds"
3032
)
3133

32-
status = get_status()
34+
try:
35+
status = get_status()
36+
failures = 0
37+
except Exception as exc:
38+
failures += 1
39+
if failures >= max_status_failures:
40+
raise HyperbrowserError(
41+
f"Failed to poll {operation_name} after {max_status_failures} attempts: {exc}"
42+
) from exc
43+
time.sleep(poll_interval_seconds)
44+
continue
45+
3346
if is_terminal_status(status):
3447
return status
3548
time.sleep(poll_interval_seconds)
@@ -62,16 +75,29 @@ async def poll_until_terminal_status_async(
6275
is_terminal_status: Callable[[str], bool],
6376
poll_interval_seconds: float,
6477
max_wait_seconds: Optional[float],
78+
max_status_failures: int = 5,
6579
) -> str:
6680
start_time = time.monotonic()
81+
failures = 0
6782

6883
while True:
6984
if has_exceeded_max_wait(start_time, max_wait_seconds):
7085
raise HyperbrowserError(
7186
f"Timed out waiting for {operation_name} after {max_wait_seconds} seconds"
7287
)
7388

74-
status = await get_status()
89+
try:
90+
status = await get_status()
91+
failures = 0
92+
except Exception as exc:
93+
failures += 1
94+
if failures >= max_status_failures:
95+
raise HyperbrowserError(
96+
f"Failed to poll {operation_name} after {max_status_failures} attempts: {exc}"
97+
) from exc
98+
await asyncio.sleep(poll_interval_seconds)
99+
continue
100+
75101
if is_terminal_status(status):
76102
return status
77103
await asyncio.sleep(poll_interval_seconds)

tests/test_polling.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,38 @@ def test_poll_until_terminal_status_times_out():
3636
)
3737

3838

39+
def test_poll_until_terminal_status_retries_transient_status_errors():
40+
attempts = {"count": 0}
41+
42+
def get_status() -> str:
43+
attempts["count"] += 1
44+
if attempts["count"] < 3:
45+
raise ValueError("temporary")
46+
return "completed"
47+
48+
status = poll_until_terminal_status(
49+
operation_name="sync poll retries",
50+
get_status=get_status,
51+
is_terminal_status=lambda value: value in {"completed", "failed"},
52+
poll_interval_seconds=0.0001,
53+
max_wait_seconds=1.0,
54+
)
55+
56+
assert status == "completed"
57+
58+
59+
def test_poll_until_terminal_status_raises_after_status_failures():
60+
with pytest.raises(HyperbrowserError, match="Failed to poll sync poll failure"):
61+
poll_until_terminal_status(
62+
operation_name="sync poll failure",
63+
get_status=lambda: (_ for _ in ()).throw(ValueError("always")),
64+
is_terminal_status=lambda value: value in {"completed", "failed"},
65+
poll_interval_seconds=0.0001,
66+
max_wait_seconds=1.0,
67+
max_status_failures=2,
68+
)
69+
70+
3971
def test_retry_operation_retries_and_returns_value():
4072
attempts = {"count": 0}
4173

@@ -95,3 +127,25 @@ async def operation() -> str:
95127
assert result == "ok"
96128

97129
asyncio.run(run())
130+
131+
132+
def test_async_poll_until_terminal_status_retries_transient_status_errors():
133+
async def run() -> None:
134+
attempts = {"count": 0}
135+
136+
async def get_status() -> str:
137+
attempts["count"] += 1
138+
if attempts["count"] < 3:
139+
raise ValueError("temporary")
140+
return "completed"
141+
142+
status = await poll_until_terminal_status_async(
143+
operation_name="async poll retries",
144+
get_status=get_status,
145+
is_terminal_status=lambda value: value in {"completed", "failed"},
146+
poll_interval_seconds=0.0001,
147+
max_wait_seconds=1.0,
148+
)
149+
assert status == "completed"
150+
151+
asyncio.run(run())

0 commit comments

Comments
 (0)