Skip to content

Commit 6ab3707

Browse files
Harden URL parsing boundaries in client path builder
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent dc49768 commit 6ab3707

2 files changed

Lines changed: 203 additions & 11 deletions

File tree

hyperbrowser/client/base.py

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,50 @@ def __init__(
8080
self.config = config
8181
self.transport = transport(config.api_key, headers=config.headers)
8282

83+
@staticmethod
84+
def _parse_url_components(
85+
url_value: str, *, component_label: str
86+
) -> tuple[str, str, str, str, str]:
87+
try:
88+
parsed_url = urlparse(url_value)
89+
except HyperbrowserError:
90+
raise
91+
except Exception as exc:
92+
raise HyperbrowserError(
93+
f"Failed to parse {component_label}",
94+
original_error=exc,
95+
) from exc
96+
try:
97+
parsed_url_scheme = parsed_url.scheme
98+
parsed_url_netloc = parsed_url.netloc
99+
parsed_url_path = parsed_url.path
100+
parsed_url_query = parsed_url.query
101+
parsed_url_fragment = parsed_url.fragment
102+
except HyperbrowserError:
103+
raise
104+
except Exception as exc:
105+
raise HyperbrowserError(
106+
f"Failed to parse {component_label} components",
107+
original_error=exc,
108+
) from exc
109+
if (
110+
type(parsed_url_scheme) is not str
111+
or type(parsed_url_netloc) is not str
112+
or type(parsed_url_path) is not str
113+
or type(parsed_url_query) is not str
114+
or type(parsed_url_fragment) is not str
115+
):
116+
raise HyperbrowserError(
117+
f"{component_label} parser returned invalid URL components"
118+
)
119+
return (
120+
parsed_url_scheme,
121+
parsed_url_netloc,
122+
parsed_url_path,
123+
parsed_url_query,
124+
parsed_url_fragment,
125+
)
126+
83127
def _build_url(self, path: str) -> str:
84128
if not isinstance(path, str):
85129
raise HyperbrowserError("path must be a string")
@@ -94,10 +138,16 @@ def _build_url(self, path: str) -> str:
94138
raise HyperbrowserError("path must not contain backslashes")
95139
if "\n" in stripped_path or "\r" in stripped_path:
96140
raise HyperbrowserError("path must not contain newline characters")
97-
parsed_path = urlparse(stripped_path)
98-
if parsed_path.scheme:
141+
(
142+
parsed_path_scheme,
143+
_parsed_path_netloc,
144+
_parsed_path_path,
145+
parsed_path_query,
146+
parsed_path_fragment,
147+
) = self._parse_url_components(stripped_path, component_label="path")
148+
if parsed_path_scheme:
99149
raise HyperbrowserError("path must be a relative API path")
100-
if parsed_path.fragment:
150+
if parsed_path_fragment:
101151
raise HyperbrowserError("path must not include URL fragments")
102152
raw_query_component = (
103153
stripped_path.split("?", 1)[1] if "?" in stripped_path else ""
@@ -115,17 +165,20 @@ def _build_url(self, path: str) -> str:
115165
)
116166
if any(
117167
character.isspace() or ord(character) < 32 or ord(character) == 127
118-
for character in parsed_path.query
168+
for character in parsed_path_query
119169
):
120170
raise HyperbrowserError(
121171
"path query must not contain unencoded whitespace or control characters"
122172
)
123173
normalized_path = f"/{stripped_path.lstrip('/')}"
124-
normalized_parts = urlparse(normalized_path)
125-
normalized_path_only = normalized_parts.path
126-
normalized_query_suffix = (
127-
f"?{normalized_parts.query}" if normalized_parts.query else ""
128-
)
174+
(
175+
_normalized_path_scheme,
176+
_normalized_path_netloc,
177+
normalized_path_only,
178+
normalized_path_query,
179+
_normalized_path_fragment,
180+
) = self._parse_url_components(normalized_path, component_label="normalized path")
181+
normalized_query_suffix = f"?{normalized_path_query}" if normalized_path_query else ""
129182
decoded_path = ClientConfig._decode_url_component_with_limit(
130183
normalized_path_only, component_label="path"
131184
)
@@ -149,8 +202,14 @@ def _build_url(self, path: str) -> str:
149202
if any(segment in {".", ".."} for segment in normalized_segments):
150203
raise HyperbrowserError("path must not contain relative path segments")
151204
normalized_base_url = ClientConfig.normalize_base_url(self.config.base_url)
152-
parsed_base_url = urlparse(normalized_base_url)
153-
base_has_api_suffix = parsed_base_url.path.rstrip("/").endswith("/api")
205+
(
206+
_base_url_scheme,
207+
_base_url_netloc,
208+
parsed_base_url_path,
209+
_base_url_query,
210+
_base_url_fragment,
211+
) = self._parse_url_components(normalized_base_url, component_label="base_url")
212+
base_has_api_suffix = parsed_base_url_path.rstrip("/").endswith("/api")
154213

155214
if normalized_path_only == "/api" or normalized_path_only.startswith("/api/"):
156215
if base_has_api_suffix:

tests/test_url_building.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
from urllib.parse import quote
33

4+
import hyperbrowser.client.base as client_base_module
45
from hyperbrowser import Hyperbrowser
56
from hyperbrowser.config import ClientConfig
67
from hyperbrowser.exceptions import HyperbrowserError
@@ -392,3 +393,135 @@ def test_client_build_url_normalizes_runtime_trailing_slashes():
392393
assert client._build_url("/session") == "https://example.local/api/session"
393394
finally:
394395
client.close()
396+
397+
398+
def test_client_build_url_wraps_path_parse_runtime_errors(
399+
monkeypatch: pytest.MonkeyPatch,
400+
):
401+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
402+
try:
403+
def _raise_parse_runtime_error(_value: str):
404+
raise RuntimeError("path parse exploded")
405+
406+
monkeypatch.setattr(client_base_module, "urlparse", _raise_parse_runtime_error)
407+
408+
with pytest.raises(HyperbrowserError, match="Failed to parse path") as exc_info:
409+
client._build_url("/session")
410+
411+
assert isinstance(exc_info.value.original_error, RuntimeError)
412+
finally:
413+
client.close()
414+
415+
416+
def test_client_build_url_preserves_hyperbrowser_path_parse_errors(
417+
monkeypatch: pytest.MonkeyPatch,
418+
):
419+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
420+
try:
421+
def _raise_parse_hyperbrowser_error(_value: str):
422+
raise HyperbrowserError("custom path parse failure")
423+
424+
monkeypatch.setattr(
425+
client_base_module,
426+
"urlparse",
427+
_raise_parse_hyperbrowser_error,
428+
)
429+
430+
with pytest.raises(
431+
HyperbrowserError, match="custom path parse failure"
432+
) as exc_info:
433+
client._build_url("/session")
434+
435+
assert exc_info.value.original_error is None
436+
finally:
437+
client.close()
438+
439+
440+
def test_client_build_url_wraps_path_component_access_runtime_errors(
441+
monkeypatch: pytest.MonkeyPatch,
442+
):
443+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
444+
try:
445+
class _BrokenParsedPath:
446+
@property
447+
def scheme(self) -> str:
448+
raise RuntimeError("path scheme exploded")
449+
450+
monkeypatch.setattr(client_base_module, "urlparse", lambda _value: _BrokenParsedPath())
451+
452+
with pytest.raises(
453+
HyperbrowserError, match="Failed to parse path components"
454+
) as exc_info:
455+
client._build_url("/session")
456+
457+
assert isinstance(exc_info.value.original_error, RuntimeError)
458+
finally:
459+
client.close()
460+
461+
462+
def test_client_build_url_rejects_invalid_path_parser_component_types(
463+
monkeypatch: pytest.MonkeyPatch,
464+
):
465+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
466+
try:
467+
class _InvalidParsedPath:
468+
scheme = object()
469+
netloc = ""
470+
path = "/session"
471+
query = ""
472+
fragment = ""
473+
474+
monkeypatch.setattr(client_base_module, "urlparse", lambda _value: _InvalidParsedPath())
475+
476+
with pytest.raises(
477+
HyperbrowserError, match="path parser returned invalid URL components"
478+
):
479+
client._build_url("/session")
480+
finally:
481+
client.close()
482+
483+
484+
def test_client_build_url_wraps_normalized_path_parse_runtime_errors(
485+
monkeypatch: pytest.MonkeyPatch,
486+
):
487+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
488+
try:
489+
original_urlparse = client_base_module.urlparse
490+
491+
def _conditional_urlparse(value: str):
492+
if value == "/session":
493+
raise RuntimeError("normalized path parse exploded")
494+
return original_urlparse(value)
495+
496+
monkeypatch.setattr(client_base_module, "urlparse", _conditional_urlparse)
497+
498+
with pytest.raises(
499+
HyperbrowserError, match="Failed to parse normalized path"
500+
) as exc_info:
501+
client._build_url("session")
502+
503+
assert isinstance(exc_info.value.original_error, RuntimeError)
504+
finally:
505+
client.close()
506+
507+
508+
def test_client_build_url_wraps_base_url_parse_runtime_errors(
509+
monkeypatch: pytest.MonkeyPatch,
510+
):
511+
client = Hyperbrowser(config=ClientConfig(api_key="test-key"))
512+
try:
513+
original_urlparse = client_base_module.urlparse
514+
515+
def _conditional_urlparse(value: str):
516+
if value == "https://api.hyperbrowser.ai":
517+
raise RuntimeError("base_url parse exploded")
518+
return original_urlparse(value)
519+
520+
monkeypatch.setattr(client_base_module, "urlparse", _conditional_urlparse)
521+
522+
with pytest.raises(HyperbrowserError, match="Failed to parse base_url") as exc_info:
523+
client._build_url("/session")
524+
525+
assert isinstance(exc_info.value.original_error, RuntimeError)
526+
finally:
527+
client.close()

0 commit comments

Comments
 (0)