4040
4141from .config import BrowserConfig
4242
43+ try :
44+ from playwright_stealth import Stealth as _StealthClass # type: ignore[import-untyped]
45+ except ImportError : # pragma: no cover
46+ _StealthClass = None
47+
48+ try :
49+ from playwright_stealth import stealth_async as _legacy_stealth_async # type: ignore[import-untyped]
50+ except ImportError : # pragma: no cover - older/newer API compatibility
51+ _legacy_stealth_async = None
52+
4353logger = logging .getLogger (__name__ )
54+ _STEALTH_BACKEND_LOGGED = False
4455
4556# --- OPTIMIZED CONFIGURATION ---
4657DEFAULT_USER_AGENTS = [
@@ -94,6 +105,7 @@ def __init__(
94105
95106 self .browser_type_name = self .config .browser_type .lower ()
96107 self .headless = self .config .headless
108+ self .stealth_mode = bool (getattr (self .config , "stealth_mode" , True ))
97109
98110 # Mapping properties
99111 self .user_agents = DEFAULT_USER_AGENTS
@@ -111,12 +123,64 @@ def __init__(
111123
112124 self ._playwright : Optional [Playwright ] = None
113125 self ._browser : Optional [Browser ] = None
126+ self ._logged_socks_auth_skips : set [str ] = set ()
127+ self ._stealth = _StealthClass () if _StealthClass is not None else None
128+ self ._stealth_missing_warned = False
129+ global _STEALTH_BACKEND_LOGGED
130+ if not _STEALTH_BACKEND_LOGGED :
131+ if self ._stealth is not None :
132+ logger .info ("Playwright stealth backend: playwright_stealth.Stealth" )
133+ elif callable (_legacy_stealth_async ):
134+ logger .info (
135+ "Playwright stealth backend: playwright_stealth.stealth_async"
136+ )
137+ else :
138+ logger .info (
139+ "Playwright stealth backend: unavailable (basic webdriver scrub only)"
140+ )
141+ _STEALTH_BACKEND_LOGGED = True
114142
115143 logger .info (
116144 f"PlaywrightManager initialized: Browser={ self .browser_type_name } , Headless={ self .headless } , "
117145 f"Default Timeout={ self .default_navigation_timeout_ms } ms"
118146 )
119147
148+ def _build_playwright_proxy_settings (self , proxy_obj : Any ) -> Dict [str , str ]:
149+ """
150+ Build Playwright proxy settings from a Proxy model.
151+ NOTE: Playwright does not support SOCKS authentication fields.
152+ """
153+ protocol = (
154+ proxy_obj .protocol .value
155+ if hasattr (proxy_obj .protocol , "value" )
156+ else str (proxy_obj .protocol )
157+ ).lower ()
158+ proxy_settings : Dict [str , str ] = {
159+ "server" : f"{ protocol } ://{ proxy_obj .hostname } :{ proxy_obj .port } "
160+ }
161+
162+ username = str (getattr (proxy_obj , "username" , "" ) or "" )
163+ password = str (getattr (proxy_obj , "password" , "" ) or "" )
164+ has_auth = bool (username or password )
165+ supports_auth = protocol in {"http" , "https" }
166+
167+ if supports_auth :
168+ if username :
169+ proxy_settings ["username" ] = username
170+ if password :
171+ proxy_settings ["password" ] = password
172+ elif has_auth :
173+ warning_key = f"{ proxy_obj .hostname } :{ proxy_obj .port } :{ protocol } "
174+ if warning_key not in self ._logged_socks_auth_skips :
175+ logger .warning (
176+ "Proxy %s provided SOCKS credentials, but Playwright does not "
177+ "support SOCKS authentication. Credentials will be ignored." ,
178+ warning_key ,
179+ )
180+ self ._logged_socks_auth_skips .add (warning_key )
181+
182+ return proxy_settings
183+
120184 @property
121185 def browser (self ) -> Browser :
122186 """Expose active browser for backward-compatible advanced operations."""
@@ -202,21 +266,12 @@ async def get_new_page(
202266 # Construct Playwright Proxy Dict
203267 # protocol://user:pass@host:port OR separate fields
204268 # Playwright expects: { "server": "...", "username": "...", "password": "..." }
205-
206- # Protocol handling
207269 protocol = (
208270 proxy_obj .protocol .value
209271 if hasattr (proxy_obj .protocol , "value" )
210272 else str (proxy_obj .protocol )
211273 )
212-
213- proxy_settings = {
214- "server" : f"{ protocol } ://{ proxy_obj .hostname } :{ proxy_obj .port } "
215- }
216- if proxy_obj .username :
217- proxy_settings ["username" ] = proxy_obj .username
218- if proxy_obj .password :
219- proxy_settings ["password" ] = proxy_obj .password
274+ proxy_settings = self ._build_playwright_proxy_settings (proxy_obj )
220275
221276 base_context_options ["proxy" ] = proxy_settings
222277 logger .info (
@@ -239,16 +294,25 @@ async def get_new_page(
239294 )
240295
241296 # Tracker and ad blocking for faster, cleaner page loads
242- await context .route (
243- "**/*" ,
244- lambda route : (
245- route .abort ()
246- if self ._is_tracker_or_ad (route .request .url )
247- else route .continue_ ()
248- ),
249- )
297+ async def _route_handler (route : Any ) -> None :
298+ if self ._is_tracker_or_ad (route .request .url ):
299+ await route .abort ()
300+ return
301+ await route .continue_ ()
302+
303+ await context .route ("**/*" , _route_handler )
250304
251305 page = await context .new_page ()
306+ if self .stealth_mode and self ._stealth is not None :
307+ await self ._stealth .apply_stealth_async (page )
308+ elif self .stealth_mode and callable (_legacy_stealth_async ):
309+ await _legacy_stealth_async (page )
310+ elif self .stealth_mode and not self ._stealth_missing_warned :
311+ logger .warning (
312+ "stealth_mode is enabled but playwright_stealth is unavailable; "
313+ "falling back to basic webdriver scrubbing only."
314+ )
315+ self ._stealth_missing_warned = True
252316 return page , context
253317 except Exception as e :
254318 logger .error (f"Error creating new page and context: { e } " , exc_info = True )
@@ -404,6 +468,14 @@ async def fetch_page_content(
404468 f"Playwright: Timeout on { current_url_val } "
405469 f"(attempt { attempt + 1 } /{ effective_retries + 1 } ): { pte } "
406470 )
471+ except asyncio .CancelledError :
472+ logger .warning (
473+ "Playwright: Fetch cancelled on %s (attempt %s/%s)." ,
474+ current_url_val ,
475+ attempt + 1 ,
476+ effective_retries + 1 ,
477+ )
478+ raise
407479 except Exception as e :
408480 logger .error (
409481 f"Playwright: Unexpected error on { current_url_val } "
@@ -493,6 +565,9 @@ async def smart_fetch(
493565 )
494566
495567 return content , final_url , status
568+ except asyncio .CancelledError :
569+ logger .warning ("SmartFetch cancelled for %s." , url )
570+ raise
496571
497572 finally :
498573 if page :
0 commit comments