Skip to content

Commit 56b2d2d

Browse files
Centralize paginated web job response initialization
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent 86ac741 commit 56b2d2d

File tree

7 files changed

+78
-28
lines changed

7 files changed

+78
-28
lines changed

hyperbrowser/client/managers/async_manager/web/batch_fetch.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111
from ...web_payload_utils import build_batch_fetch_start_payload
1212
from ...web_payload_utils import build_batch_fetch_get_params
13+
from ...web_pagination_utils import initialize_paginated_job_response
1314
from ....polling import (
1415
build_fetch_operation_name,
1516
build_operation_name,
@@ -96,14 +97,10 @@ async def start_and_wait(
9697
retry_delay_seconds=0.5,
9798
)
9899

99-
job_response = BatchFetchJobResponse(
100-
jobId=job_id,
100+
job_response = initialize_paginated_job_response(
101+
model=BatchFetchJobResponse,
102+
job_id=job_id,
101103
status=job_status,
102-
data=[],
103-
currentPageBatch=0,
104-
totalPageBatches=0,
105-
totalPages=0,
106-
batchSize=100,
107104
)
108105

109106
def merge_page_response(page_response: BatchFetchJobResponse) -> None:

hyperbrowser/client/managers/async_manager/web/crawl.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111
from ...web_payload_utils import build_web_crawl_start_payload
1212
from ...web_payload_utils import build_web_crawl_get_params
13+
from ...web_pagination_utils import initialize_paginated_job_response
1314
from ....polling import (
1415
build_fetch_operation_name,
1516
build_operation_name,
@@ -94,14 +95,10 @@ async def start_and_wait(
9495
retry_delay_seconds=0.5,
9596
)
9697

97-
job_response = WebCrawlJobResponse(
98-
jobId=job_id,
98+
job_response = initialize_paginated_job_response(
99+
model=WebCrawlJobResponse,
100+
job_id=job_id,
99101
status=job_status,
100-
data=[],
101-
currentPageBatch=0,
102-
totalPageBatches=0,
103-
totalPages=0,
104-
batchSize=100,
105102
)
106103

107104
def merge_page_response(page_response: WebCrawlJobResponse) -> None:

hyperbrowser/client/managers/sync_manager/web/batch_fetch.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111
from ...web_payload_utils import build_batch_fetch_start_payload
1212
from ...web_payload_utils import build_batch_fetch_get_params
13+
from ...web_pagination_utils import initialize_paginated_job_response
1314
from ....polling import (
1415
build_fetch_operation_name,
1516
build_operation_name,
@@ -94,14 +95,10 @@ def start_and_wait(
9495
retry_delay_seconds=0.5,
9596
)
9697

97-
job_response = BatchFetchJobResponse(
98-
jobId=job_id,
98+
job_response = initialize_paginated_job_response(
99+
model=BatchFetchJobResponse,
100+
job_id=job_id,
99101
status=job_status,
100-
data=[],
101-
currentPageBatch=0,
102-
totalPageBatches=0,
103-
totalPages=0,
104-
batchSize=100,
105102
)
106103

107104
def merge_page_response(page_response: BatchFetchJobResponse) -> None:

hyperbrowser/client/managers/sync_manager/web/crawl.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111
from ...web_payload_utils import build_web_crawl_start_payload
1212
from ...web_payload_utils import build_web_crawl_get_params
13+
from ...web_pagination_utils import initialize_paginated_job_response
1314
from ....polling import (
1415
build_fetch_operation_name,
1516
build_operation_name,
@@ -94,14 +95,10 @@ def start_and_wait(
9495
retry_delay_seconds=0.5,
9596
)
9697

97-
job_response = WebCrawlJobResponse(
98-
jobId=job_id,
98+
job_response = initialize_paginated_job_response(
99+
model=WebCrawlJobResponse,
100+
job_id=job_id,
99101
status=job_status,
100-
data=[],
101-
currentPageBatch=0,
102-
totalPageBatches=0,
103-
totalPages=0,
104-
batchSize=100,
105102
)
106103

107104
def merge_page_response(page_response: WebCrawlJobResponse) -> None:
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from typing import Type, TypeVar
2+
3+
T = TypeVar("T")
4+
5+
6+
def initialize_paginated_job_response(
7+
*,
8+
model: Type[T],
9+
job_id: str,
10+
status: str,
11+
batch_size: int = 100,
12+
) -> T:
13+
return model(
14+
jobId=job_id,
15+
status=status,
16+
data=[],
17+
currentPageBatch=0,
18+
totalPageBatches=0,
19+
totalPages=0,
20+
batchSize=batch_size,
21+
)

tests/test_web_pagination_utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from hyperbrowser.client.managers.web_pagination_utils import (
2+
initialize_paginated_job_response,
3+
)
4+
from hyperbrowser.models import BatchFetchJobResponse, WebCrawlJobResponse
5+
6+
7+
def test_initialize_paginated_job_response_for_batch_fetch():
8+
response = initialize_paginated_job_response(
9+
model=BatchFetchJobResponse,
10+
job_id="job-1",
11+
status="completed",
12+
)
13+
14+
assert response.job_id == "job-1"
15+
assert response.status == "completed"
16+
assert response.data == []
17+
assert response.current_page_batch == 0
18+
assert response.total_page_batches == 0
19+
assert response.total_pages == 0
20+
assert response.batch_size == 100
21+
22+
23+
def test_initialize_paginated_job_response_for_web_crawl_with_custom_batch_size():
24+
response = initialize_paginated_job_response(
25+
model=WebCrawlJobResponse,
26+
job_id="job-2",
27+
status="running",
28+
batch_size=25,
29+
)
30+
31+
assert response.job_id == "job-2"
32+
assert response.status == "running"
33+
assert response.data == []
34+
assert response.current_page_batch == 0
35+
assert response.total_page_batches == 0
36+
assert response.total_pages == 0
37+
assert response.batch_size == 25

tests/test_web_payload_helper_usage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,18 @@ def test_batch_fetch_managers_use_shared_start_payload_helper():
3535
module_text = Path(module_path).read_text(encoding="utf-8")
3636
assert "build_batch_fetch_start_payload(" in module_text
3737
assert "build_batch_fetch_get_params(" in module_text
38+
assert "initialize_paginated_job_response(" in module_text
3839
assert "inject_web_output_schemas(" not in module_text
3940
assert "serialize_model_dump_to_dict(" not in module_text
41+
assert "BatchFetchJobResponse(" not in module_text
4042

4143

4244
def test_web_crawl_managers_use_shared_start_payload_helper():
4345
for module_path in WEB_CRAWL_MANAGER_MODULES:
4446
module_text = Path(module_path).read_text(encoding="utf-8")
4547
assert "build_web_crawl_start_payload(" in module_text
4648
assert "build_web_crawl_get_params(" in module_text
49+
assert "initialize_paginated_job_response(" in module_text
4750
assert "inject_web_output_schemas(" not in module_text
4851
assert "serialize_model_dump_to_dict(" not in module_text
52+
assert "WebCrawlJobResponse(" not in module_text

0 commit comments

Comments
 (0)