Skip to content

Commit d100327

Browse files
Reuse shared page batch accessors in paginated managers
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent 8658934 commit d100327

11 files changed

Lines changed: 66 additions & 49 deletions

File tree

hyperbrowser/client/managers/async_manager/crawl.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ..job_fetch_utils import (
88
collect_paginated_results_with_defaults_async,
99
fetch_job_result_with_defaults_async,
10+
read_page_current_batch,
11+
read_page_total_batches,
1012
)
1113
from ..page_params_utils import build_page_batch_params
1214
from ..job_pagination_utils import (
@@ -119,12 +121,8 @@ async def start_and_wait(
119121
page=page,
120122
),
121123
),
122-
get_current_page_batch=lambda page_response: (
123-
page_response.current_page_batch
124-
),
125-
get_total_page_batches=lambda page_response: (
126-
page_response.total_page_batches
127-
),
124+
get_current_page_batch=read_page_current_batch,
125+
get_total_page_batches=read_page_total_batches,
128126
on_page_success=build_job_paginated_page_merge_callback(
129127
job_response=job_response,
130128
total_counter_attr="total_crawled_pages",

hyperbrowser/client/managers/async_manager/scrape.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ..job_fetch_utils import (
88
collect_paginated_results_with_defaults_async,
99
fetch_job_result_with_defaults_async,
10+
read_page_current_batch,
11+
read_page_total_batches,
1012
)
1113
from ..page_params_utils import build_page_batch_params
1214
from ..job_pagination_utils import (
@@ -129,12 +131,8 @@ async def start_and_wait(
129131
page=page,
130132
),
131133
),
132-
get_current_page_batch=lambda page_response: (
133-
page_response.current_page_batch
134-
),
135-
get_total_page_batches=lambda page_response: (
136-
page_response.total_page_batches
137-
),
134+
get_current_page_batch=read_page_current_batch,
135+
get_total_page_batches=read_page_total_batches,
138136
on_page_success=build_job_paginated_page_merge_callback(
139137
job_response=job_response,
140138
total_counter_attr="total_scraped_pages",

hyperbrowser/client/managers/async_manager/web/batch_fetch.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from ...job_fetch_utils import (
2020
collect_paginated_results_with_defaults_async,
2121
fetch_job_result_with_defaults_async,
22+
read_page_current_batch,
23+
read_page_total_batches,
2224
)
2325
from ....polling import (
2426
poll_until_terminal_status_async,
@@ -115,12 +117,8 @@ async def start_and_wait(
115117
page=page,
116118
),
117119
),
118-
get_current_page_batch=lambda page_response: (
119-
page_response.current_page_batch
120-
),
121-
get_total_page_batches=lambda page_response: (
122-
page_response.total_page_batches
123-
),
120+
get_current_page_batch=read_page_current_batch,
121+
get_total_page_batches=read_page_total_batches,
124122
on_page_success=build_paginated_page_merge_callback(
125123
job_response=job_response,
126124
),

hyperbrowser/client/managers/async_manager/web/crawl.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from ...job_fetch_utils import (
2020
collect_paginated_results_with_defaults_async,
2121
fetch_job_result_with_defaults_async,
22+
read_page_current_batch,
23+
read_page_total_batches,
2224
)
2325
from ....polling import (
2426
poll_until_terminal_status_async,
@@ -113,12 +115,8 @@ async def start_and_wait(
113115
page=page,
114116
),
115117
),
116-
get_current_page_batch=lambda page_response: (
117-
page_response.current_page_batch
118-
),
119-
get_total_page_batches=lambda page_response: (
120-
page_response.total_page_batches
121-
),
118+
get_current_page_batch=read_page_current_batch,
119+
get_total_page_batches=read_page_total_batches,
122120
on_page_success=build_paginated_page_merge_callback(
123121
job_response=job_response,
124122
),

hyperbrowser/client/managers/job_fetch_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Awaitable, Callable, Optional, TypeVar
1+
from typing import Any, Awaitable, Callable, Optional, TypeVar
22

33
from hyperbrowser.models.consts import POLLING_ATTEMPTS
44

@@ -62,6 +62,14 @@ async def fetch_job_result_with_defaults_async(
6262
)
6363

6464

65+
def read_page_current_batch(page_response: Any) -> int:
66+
return page_response.current_page_batch
67+
68+
69+
def read_page_total_batches(page_response: Any) -> int:
70+
return page_response.total_page_batches
71+
72+
6573
def collect_paginated_results_with_defaults(
6674
*,
6775
operation_name: str,

hyperbrowser/client/managers/sync_manager/crawl.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ..job_fetch_utils import (
88
collect_paginated_results_with_defaults,
99
fetch_job_result_with_defaults,
10+
read_page_current_batch,
11+
read_page_total_batches,
1012
)
1113
from ..page_params_utils import build_page_batch_params
1214
from ..job_pagination_utils import (
@@ -119,12 +121,8 @@ def start_and_wait(
119121
page=page,
120122
),
121123
),
122-
get_current_page_batch=lambda page_response: (
123-
page_response.current_page_batch
124-
),
125-
get_total_page_batches=lambda page_response: (
126-
page_response.total_page_batches
127-
),
124+
get_current_page_batch=read_page_current_batch,
125+
get_total_page_batches=read_page_total_batches,
128126
on_page_success=build_job_paginated_page_merge_callback(
129127
job_response=job_response,
130128
total_counter_attr="total_crawled_pages",

hyperbrowser/client/managers/sync_manager/scrape.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ..job_fetch_utils import (
88
collect_paginated_results_with_defaults,
99
fetch_job_result_with_defaults,
10+
read_page_current_batch,
11+
read_page_total_batches,
1012
)
1113
from ..page_params_utils import build_page_batch_params
1214
from ..job_pagination_utils import (
@@ -127,12 +129,8 @@ def start_and_wait(
127129
page=page,
128130
),
129131
),
130-
get_current_page_batch=lambda page_response: (
131-
page_response.current_page_batch
132-
),
133-
get_total_page_batches=lambda page_response: (
134-
page_response.total_page_batches
135-
),
132+
get_current_page_batch=read_page_current_batch,
133+
get_total_page_batches=read_page_total_batches,
136134
on_page_success=build_job_paginated_page_merge_callback(
137135
job_response=job_response,
138136
total_counter_attr="total_scraped_pages",

hyperbrowser/client/managers/sync_manager/web/batch_fetch.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from ...job_fetch_utils import (
2020
collect_paginated_results_with_defaults,
2121
fetch_job_result_with_defaults,
22+
read_page_current_batch,
23+
read_page_total_batches,
2224
)
2325
from ....polling import (
2426
poll_until_terminal_status,
@@ -113,12 +115,8 @@ def start_and_wait(
113115
page=page,
114116
),
115117
),
116-
get_current_page_batch=lambda page_response: (
117-
page_response.current_page_batch
118-
),
119-
get_total_page_batches=lambda page_response: (
120-
page_response.total_page_batches
121-
),
118+
get_current_page_batch=read_page_current_batch,
119+
get_total_page_batches=read_page_total_batches,
122120
on_page_success=build_paginated_page_merge_callback(
123121
job_response=job_response,
124122
),

hyperbrowser/client/managers/sync_manager/web/crawl.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from ...job_fetch_utils import (
2020
collect_paginated_results_with_defaults,
2121
fetch_job_result_with_defaults,
22+
read_page_current_batch,
23+
read_page_total_batches,
2224
)
2325
from ....polling import (
2426
poll_until_terminal_status,
@@ -113,12 +115,8 @@ def start_and_wait(
113115
page=page,
114116
),
115117
),
116-
get_current_page_batch=lambda page_response: (
117-
page_response.current_page_batch
118-
),
119-
get_total_page_batches=lambda page_response: (
120-
page_response.total_page_batches
121-
),
118+
get_current_page_batch=read_page_current_batch,
119+
get_total_page_batches=read_page_total_batches,
122120
on_page_success=build_paginated_page_merge_callback(
123121
job_response=job_response,
124122
),

tests/test_job_fetch_helper_usage.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,13 @@ def test_sync_managers_use_job_fetch_helpers_with_defaults():
2525
module_text = Path(module_path).read_text(encoding="utf-8")
2626
assert "fetch_job_result_with_defaults(" in module_text
2727
assert "collect_paginated_results_with_defaults(" in module_text
28+
assert "read_page_current_batch" in module_text
29+
assert "read_page_total_batches" in module_text
2830
assert "retry_operation(" not in module_text
2931
assert "collect_paginated_results(" not in module_text
3032
assert "build_fetch_operation_name(" not in module_text
33+
assert "get_current_page_batch=lambda page_response:" not in module_text
34+
assert "get_total_page_batches=lambda page_response:" not in module_text
3135
assert "max_attempts=POLLING_ATTEMPTS" not in module_text
3236
assert "retry_delay_seconds=0.5" not in module_text
3337

@@ -37,8 +41,12 @@ def test_async_managers_use_job_fetch_helpers_with_defaults():
3741
module_text = Path(module_path).read_text(encoding="utf-8")
3842
assert "fetch_job_result_with_defaults_async(" in module_text
3943
assert "collect_paginated_results_with_defaults_async(" in module_text
44+
assert "read_page_current_batch" in module_text
45+
assert "read_page_total_batches" in module_text
4046
assert "retry_operation_async(" not in module_text
4147
assert "collect_paginated_results_async(" not in module_text
4248
assert "build_fetch_operation_name(" not in module_text
49+
assert "get_current_page_batch=lambda page_response:" not in module_text
50+
assert "get_total_page_batches=lambda page_response:" not in module_text
4351
assert "max_attempts=POLLING_ATTEMPTS" not in module_text
4452
assert "retry_delay_seconds=0.5" not in module_text

0 commit comments

Comments
 (0)