Skip to content

Commit 558832b

Browse files
Centralize scrape and crawl start payload builders
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent c905fe4 commit 558832b

10 files changed

Lines changed: 187 additions & 28 deletions

File tree

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ This runs lint, format checks, compile checks, tests, and package build.
9999
- `tests/test_extract_payload_helper_usage.py` (extract start-payload helper usage enforcement),
100100
- `tests/test_guardrail_ast_utils.py` (shared AST guard utility contract),
101101
- `tests/test_job_pagination_helper_usage.py` (shared scrape/crawl pagination helper usage enforcement),
102+
- `tests/test_job_start_payload_helper_usage.py` (shared scrape/crawl start-payload helper usage enforcement),
102103
- `tests/test_job_wait_helper_boundary.py` (centralization boundary enforcement for wait-for-job helper primitives),
103104
- `tests/test_job_wait_helper_usage.py` (shared wait-for-job defaults helper usage enforcement),
104105
- `tests/test_makefile_quality_targets.py` (Makefile quality-gate target enforcement),

hyperbrowser/client/managers/async_manager/crawl.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
initialize_job_paginated_response,
1414
)
1515
from ..job_status_utils import is_default_terminal_job_status
16+
from ..job_start_payload_utils import build_crawl_start_payload
1617
from ..serialization_utils import (
1718
serialize_model_dump_or_default,
18-
serialize_model_dump_to_dict,
1919
)
2020
from ..response_utils import parse_response_model
2121
from ..start_job_utils import build_started_job_context
@@ -33,10 +33,7 @@ def __init__(self, client):
3333
self._client = client
3434

3535
async def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
36-
payload = serialize_model_dump_to_dict(
37-
params,
38-
error_message="Failed to serialize crawl start params",
39-
)
36+
payload = build_crawl_start_payload(params)
4037
response = await self._client.transport.post(
4138
self._client._build_url("/crawl"),
4239
data=payload,

hyperbrowser/client/managers/async_manager/scrape.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@
1414
)
1515
from ..job_status_utils import is_default_terminal_job_status
1616
from ..job_wait_utils import wait_for_job_result_with_defaults_async
17+
from ..job_start_payload_utils import (
18+
build_batch_scrape_start_payload,
19+
build_scrape_start_payload,
20+
)
1721
from ..serialization_utils import (
1822
serialize_model_dump_or_default,
19-
serialize_model_dump_to_dict,
2023
)
2124
from ..response_utils import parse_response_model
2225
from ..start_job_utils import build_started_job_context
@@ -40,10 +43,7 @@ def __init__(self, client):
4043
async def start(
4144
self, params: StartBatchScrapeJobParams
4245
) -> StartBatchScrapeJobResponse:
43-
payload = serialize_model_dump_to_dict(
44-
params,
45-
error_message="Failed to serialize batch scrape start params",
46-
)
46+
payload = build_batch_scrape_start_payload(params)
4747
response = await self._client.transport.post(
4848
self._client._build_url("/scrape/batch"),
4949
data=payload,
@@ -154,10 +154,7 @@ def __init__(self, client):
154154
self.batch = BatchScrapeManager(client)
155155

156156
async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
157-
payload = serialize_model_dump_to_dict(
158-
params,
159-
error_message="Failed to serialize scrape start params",
160-
)
157+
payload = build_scrape_start_payload(params)
161158
response = await self._client.transport.post(
162159
self._client._build_url("/scrape"),
163160
data=payload,
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from typing import Any, Dict
2+
3+
from hyperbrowser.models.crawl import StartCrawlJobParams
4+
from hyperbrowser.models.scrape import StartBatchScrapeJobParams, StartScrapeJobParams
5+
6+
from .serialization_utils import serialize_model_dump_to_dict
7+
8+
9+
def build_scrape_start_payload(params: StartScrapeJobParams) -> Dict[str, Any]:
10+
return serialize_model_dump_to_dict(
11+
params,
12+
error_message="Failed to serialize scrape start params",
13+
)
14+
15+
16+
def build_batch_scrape_start_payload(
17+
params: StartBatchScrapeJobParams,
18+
) -> Dict[str, Any]:
19+
return serialize_model_dump_to_dict(
20+
params,
21+
error_message="Failed to serialize batch scrape start params",
22+
)
23+
24+
25+
def build_crawl_start_payload(params: StartCrawlJobParams) -> Dict[str, Any]:
26+
return serialize_model_dump_to_dict(
27+
params,
28+
error_message="Failed to serialize crawl start params",
29+
)

hyperbrowser/client/managers/sync_manager/crawl.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
initialize_job_paginated_response,
1414
)
1515
from ..job_status_utils import is_default_terminal_job_status
16+
from ..job_start_payload_utils import build_crawl_start_payload
1617
from ..serialization_utils import (
1718
serialize_model_dump_or_default,
18-
serialize_model_dump_to_dict,
1919
)
2020
from ..response_utils import parse_response_model
2121
from ..start_job_utils import build_started_job_context
@@ -33,10 +33,7 @@ def __init__(self, client):
3333
self._client = client
3434

3535
def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
36-
payload = serialize_model_dump_to_dict(
37-
params,
38-
error_message="Failed to serialize crawl start params",
39-
)
36+
payload = build_crawl_start_payload(params)
4037
response = self._client.transport.post(
4138
self._client._build_url("/crawl"),
4239
data=payload,

hyperbrowser/client/managers/sync_manager/scrape.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@
1414
)
1515
from ..job_status_utils import is_default_terminal_job_status
1616
from ..job_wait_utils import wait_for_job_result_with_defaults
17+
from ..job_start_payload_utils import (
18+
build_batch_scrape_start_payload,
19+
build_scrape_start_payload,
20+
)
1721
from ..serialization_utils import (
1822
serialize_model_dump_or_default,
19-
serialize_model_dump_to_dict,
2023
)
2124
from ..response_utils import parse_response_model
2225
from ..start_job_utils import build_started_job_context
@@ -38,10 +41,7 @@ def __init__(self, client):
3841
self._client = client
3942

4043
def start(self, params: StartBatchScrapeJobParams) -> StartBatchScrapeJobResponse:
41-
payload = serialize_model_dump_to_dict(
42-
params,
43-
error_message="Failed to serialize batch scrape start params",
44-
)
44+
payload = build_batch_scrape_start_payload(params)
4545
response = self._client.transport.post(
4646
self._client._build_url("/scrape/batch"),
4747
data=payload,
@@ -152,10 +152,7 @@ def __init__(self, client):
152152
self.batch = BatchScrapeManager(client)
153153

154154
def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
155-
payload = serialize_model_dump_to_dict(
156-
params,
157-
error_message="Failed to serialize scrape start params",
158-
)
155+
payload = build_scrape_start_payload(params)
159156
response = self._client.transport.post(
160157
self._client._build_url("/scrape"),
161158
data=payload,

tests/test_architecture_marker_usage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"tests/test_extract_payload_helper_usage.py",
4040
"tests/test_examples_naming_convention.py",
4141
"tests/test_job_pagination_helper_usage.py",
42+
"tests/test_job_start_payload_helper_usage.py",
4243
"tests/test_job_wait_helper_boundary.py",
4344
"tests/test_job_wait_helper_usage.py",
4445
"tests/test_example_sync_async_parity.py",

tests/test_core_type_helper_usage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"hyperbrowser/client/managers/extension_create_utils.py",
3535
"hyperbrowser/client/managers/extract_payload_utils.py",
3636
"hyperbrowser/client/managers/job_pagination_utils.py",
37+
"hyperbrowser/client/managers/job_start_payload_utils.py",
3738
"hyperbrowser/client/managers/page_params_utils.py",
3839
"hyperbrowser/client/managers/job_wait_utils.py",
3940
"hyperbrowser/client/managers/session_upload_utils.py",
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
pytestmark = pytest.mark.architecture
6+
7+
8+
MODULES = (
9+
"hyperbrowser/client/managers/sync_manager/scrape.py",
10+
"hyperbrowser/client/managers/async_manager/scrape.py",
11+
"hyperbrowser/client/managers/sync_manager/crawl.py",
12+
"hyperbrowser/client/managers/async_manager/crawl.py",
13+
)
14+
15+
16+
def test_scrape_and_crawl_managers_use_shared_start_payload_helpers():
17+
for module_path in MODULES:
18+
module_text = Path(module_path).read_text(encoding="utf-8")
19+
if module_path.endswith("scrape.py"):
20+
assert "build_batch_scrape_start_payload(" in module_text
21+
assert "build_scrape_start_payload(" in module_text
22+
else:
23+
assert "build_crawl_start_payload(" in module_text
24+
assert "serialize_model_dump_to_dict(" not in module_text
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from types import MappingProxyType
2+
3+
import pytest
4+
5+
from hyperbrowser.client.managers.job_start_payload_utils import (
6+
build_batch_scrape_start_payload,
7+
build_crawl_start_payload,
8+
build_scrape_start_payload,
9+
)
10+
from hyperbrowser.exceptions import HyperbrowserError
11+
from hyperbrowser.models.crawl import StartCrawlJobParams
12+
from hyperbrowser.models.scrape import StartBatchScrapeJobParams, StartScrapeJobParams
13+
14+
15+
def test_build_scrape_start_payload_serializes_model() -> None:
16+
payload = build_scrape_start_payload(StartScrapeJobParams(url="https://example.com"))
17+
18+
assert payload == {"url": "https://example.com"}
19+
20+
21+
def test_build_batch_scrape_start_payload_serializes_model() -> None:
22+
payload = build_batch_scrape_start_payload(
23+
StartBatchScrapeJobParams(urls=["https://example.com"])
24+
)
25+
26+
assert payload == {"urls": ["https://example.com"]}
27+
28+
29+
def test_build_crawl_start_payload_serializes_model() -> None:
30+
payload = build_crawl_start_payload(
31+
StartCrawlJobParams(
32+
url="https://example.com",
33+
max_pages=5,
34+
)
35+
)
36+
37+
assert payload["url"] == "https://example.com"
38+
assert payload["maxPages"] == 5
39+
40+
41+
@pytest.mark.parametrize(
42+
("builder", "params", "error_message"),
43+
(
44+
(
45+
build_scrape_start_payload,
46+
StartScrapeJobParams(url="https://example.com"),
47+
"Failed to serialize scrape start params",
48+
),
49+
(
50+
build_batch_scrape_start_payload,
51+
StartBatchScrapeJobParams(urls=["https://example.com"]),
52+
"Failed to serialize batch scrape start params",
53+
),
54+
(
55+
build_crawl_start_payload,
56+
StartCrawlJobParams(url="https://example.com"),
57+
"Failed to serialize crawl start params",
58+
),
59+
),
60+
)
61+
def test_job_start_payload_builders_wrap_runtime_serialization_errors(
62+
monkeypatch: pytest.MonkeyPatch,
63+
builder,
64+
params,
65+
error_message: str,
66+
) -> None:
67+
def _raise_model_dump_error(*args, **kwargs):
68+
_ = args
69+
_ = kwargs
70+
raise RuntimeError("broken model_dump")
71+
72+
monkeypatch.setattr(type(params), "model_dump", _raise_model_dump_error)
73+
74+
with pytest.raises(HyperbrowserError, match=error_message) as exc_info:
75+
builder(params)
76+
77+
assert isinstance(exc_info.value.original_error, RuntimeError)
78+
79+
80+
@pytest.mark.parametrize(
81+
("builder", "params", "error_message"),
82+
(
83+
(
84+
build_scrape_start_payload,
85+
StartScrapeJobParams(url="https://example.com"),
86+
"Failed to serialize scrape start params",
87+
),
88+
(
89+
build_batch_scrape_start_payload,
90+
StartBatchScrapeJobParams(urls=["https://example.com"]),
91+
"Failed to serialize batch scrape start params",
92+
),
93+
(
94+
build_crawl_start_payload,
95+
StartCrawlJobParams(url="https://example.com"),
96+
"Failed to serialize crawl start params",
97+
),
98+
),
99+
)
100+
def test_job_start_payload_builders_reject_non_dict_model_dump_payloads(
101+
monkeypatch: pytest.MonkeyPatch,
102+
builder,
103+
params,
104+
error_message: str,
105+
) -> None:
106+
monkeypatch.setattr(
107+
type(params),
108+
"model_dump",
109+
lambda *args, **kwargs: MappingProxyType({"value": 1}),
110+
)
111+
112+
with pytest.raises(HyperbrowserError, match=error_message) as exc_info:
113+
builder(params)
114+
115+
assert exc_info.value.original_error is None

0 commit comments

Comments
 (0)