|
1 | 1 | """Authentication layer. |
2 | 2 |
|
3 | | -Resolves browser session cookies from Chrome / Edge automatically. |
4 | | -Falls back to manually configured cookies only after browser auth failure. |
5 | | -Proxy credentials are redacted from all log output. |
| 3 | +Priority order for credential resolution: |
| 4 | + 1. env_token — PAYWALLFETCHER_TOKEN env var (cookie string or bearer token) |
| 5 | + 2. env_cookie_header — PAYWALLFETCHER_COOKIE_<NAME> env vars (individual cookies) |
| 6 | + 3. browser_auto — local Chrome / Edge logged-in session |
| 7 | + 4. config_cookies — cookies field in config.json (debug-only fallback) |
| 8 | +
|
| 9 | +resolve() never prints. All warnings are stored in config['_warnings'] and |
| 10 | +emitted by the caller (cli.py) according to output mode. |
6 | 11 | """ |
7 | 12 |
|
8 | 13 | from __future__ import annotations |
9 | 14 |
|
| 15 | +import os |
10 | 16 | import re |
11 | | -import sys |
12 | 17 | from typing import Any, Dict, List, Optional, Tuple |
13 | 18 | from urllib.parse import urlparse |
14 | 19 |
|
|
26 | 31 | _DEFAULT_BROWSER_ORDER = ("chrome", "edge") |
27 | 32 | _DEFAULT_XSRF_NAMES = ("XSRF-TOKEN", "XSRF_TOKEN", "xsrf-token", "x-xsrf-token", "_xsrf") |
28 | 33 |
|
| 34 | +ENV_TOKEN_VAR = "PAYWALLFETCHER_TOKEN" |
| 35 | +ENV_COOKIE_PREFIX = "PAYWALLFETCHER_COOKIE_" |
| 36 | + |
29 | 37 |
|
30 | 38 | def resolve(config: Dict[str, Any]) -> Dict[str, Any]: |
31 | | - """Resolve auth and inject _auth_source / _cookie_records / _cookies / _xsrf_token into config.""" |
| 39 | + """Resolve auth. Never prints. Warnings are stored in config['_warnings']. |
| 40 | +
|
| 41 | + Injects into config: |
| 42 | + _auth_source, _cookie_records, _cookies, _xsrf_token, _warnings |
| 43 | + """ |
| 44 | + warnings: List[str] = [] |
32 | 45 | auth = config.get("auth", {}) |
33 | 46 | mode = (auth.get("mode") or "browser_auto").lower() |
34 | 47 |
|
35 | 48 | cookie_domains = auth.get("cookie_domains") or _derive_domains(config["base_url"]) |
36 | 49 | xsrf_names = auth.get("xsrf_cookie_names") or list(_DEFAULT_XSRF_NAMES) |
37 | 50 | required = auth.get("required_cookies") or [] |
38 | 51 |
|
39 | | - manual = _manual_records(config) |
40 | | - browser_records: List[Dict] = [] |
41 | | - browser_name: Optional[str] = None |
42 | | - browser_errors: List[str] = [] |
43 | | - |
44 | | - if mode in {"browser", "browser_auto"}: |
45 | | - browser_records, browser_name, browser_errors = _load_browser_records( |
46 | | - auth.get("browser", "auto"), cookie_domains |
47 | | - ) |
48 | | - |
49 | | - if browser_records: |
50 | | - records = _merge(manual, browser_records) |
51 | | - config["_auth_source"] = f"browser:{browser_name}" |
52 | | - elif manual: |
53 | | - records = manual |
54 | | - config["_auth_source"] = "config" |
55 | | - if mode in {"browser", "browser_auto"} and browser_errors: |
56 | | - print(f"[Warn] Browser auth unavailable, using config cookies: {' | '.join(browser_errors)}", file=sys.stderr) |
57 | | - elif mode == "config": |
58 | | - raise AuthError("No cookies found in config.json under 'cookies'.") |
| 52 | + # ── Priority 1: env_token ────────────────────────────────────────────── |
| 53 | + records = _env_token_records(config) |
| 54 | + if records: |
| 55 | + config["_auth_source"] = "env_token" |
59 | 56 | else: |
60 | | - detail = " | ".join(browser_errors) if browser_errors else "No matching cookies in local browser profiles." |
61 | | - raise AuthError( |
62 | | - f"Failed to load browser cookies automatically. {detail}\n" |
63 | | - " Ensure you are already logged into the target site in Chrome or Edge." |
64 | | - ) |
| 57 | + # ── Priority 2: env_cookie_header ────────────────────────────────── |
| 58 | + records = _env_cookie_records(config) |
| 59 | + if records: |
| 60 | + config["_auth_source"] = "env_cookie_header" |
| 61 | + else: |
| 62 | + # ── Priority 3: browser_auto ─────────────────────────────────── |
| 63 | + manual = _manual_records(config) |
| 64 | + browser_records: List[Dict] = [] |
| 65 | + browser_name: Optional[str] = None |
| 66 | + browser_errors: List[str] = [] |
| 67 | + |
| 68 | + if mode in {"browser", "browser_auto"}: |
| 69 | + browser_records, browser_name, browser_errors = _load_browser_records( |
| 70 | + auth.get("browser", "auto"), cookie_domains |
| 71 | + ) |
| 72 | + |
| 73 | + if browser_records: |
| 74 | + records = _merge(manual, browser_records) |
| 75 | + config["_auth_source"] = f"browser:{browser_name}" |
| 76 | + elif manual: |
| 77 | + # ── Priority 4: config_cookies (debug-only) ──────────────── |
| 78 | + records = manual |
| 79 | + config["_auth_source"] = "config_cookies" |
| 80 | + if mode in {"browser", "browser_auto"} and browser_errors: |
| 81 | + warnings.append( |
| 82 | + f"Browser auth unavailable, falling back to config cookies " |
| 83 | + f"(debug-only): {' | '.join(browser_errors)}" |
| 84 | + ) |
| 85 | + elif mode == "config": |
| 86 | + raise AuthError("No cookies found in config.json under 'cookies'.") |
| 87 | + else: |
| 88 | + detail = " | ".join(browser_errors) if browser_errors else "No matching cookies in local browser profiles." |
| 89 | + raise AuthError( |
| 90 | + f"Failed to load browser cookies automatically. {detail}\n" |
| 91 | + " Ensure you are already logged into the target site in Chrome or Edge.\n" |
| 92 | + f" Alternatively, set {ENV_TOKEN_VAR} or {ENV_COOKIE_PREFIX}<NAME> env vars." |
| 93 | + ) |
65 | 94 |
|
66 | 95 | cookies_dict = {r["name"]: r["value"] for r in records} |
67 | 96 | xsrf = _find_xsrf(cookies_dict, xsrf_names) |
68 | 97 |
|
69 | 98 | missing = [n for n in required if n not in cookies_dict] |
70 | 99 | if missing and config.get("_auth_source", "").startswith("browser"): |
71 | | - print(f"[Warn] Browser auth loaded but missing required cookies: {', '.join(missing)}", file=sys.stderr) |
| 100 | + warnings.append( |
| 101 | + f"Browser auth loaded but missing required cookies: {', '.join(missing)}" |
| 102 | + ) |
72 | 103 |
|
73 | 104 | config["_cookie_records"] = records |
74 | 105 | config["_cookies"] = cookies_dict |
75 | 106 | config["_xsrf_token"] = xsrf |
| 107 | + config["_warnings"] = warnings |
76 | 108 | return config |
77 | 109 |
|
78 | 110 |
|
@@ -164,6 +196,65 @@ def doctor_auth(config: Dict[str, Any]) -> Dict[str, Any]: |
164 | 196 | return result |
165 | 197 |
|
166 | 198 |
|
| 199 | +# ── env credential helpers ───────────────────────────────────────────────── |
| 200 | + |
| 201 | + |
| 202 | +def _env_token_records(config: Dict[str, Any]) -> List[Dict]: |
| 203 | + """Parse PAYWALLFETCHER_TOKEN env var into cookie records. |
| 204 | +
|
| 205 | + Accepts a semicolon-separated cookie string: ``SESSION=abc; XSRF-TOKEN=xyz`` |
| 206 | + Each ``NAME=value`` pair becomes one cookie record bound to the config host. |
| 207 | + Returns an empty list if the env var is unset or empty. |
| 208 | + """ |
| 209 | + token = os.environ.get(ENV_TOKEN_VAR, "").strip() |
| 210 | + if not token: |
| 211 | + return [] |
| 212 | + |
| 213 | + host = _normalize_domain(urlparse(config["base_url"]).netloc.split(":")[0]) |
| 214 | + domain = f".{host}" if host else None |
| 215 | + |
| 216 | + records: List[Dict] = [] |
| 217 | + for part in token.split(";"): |
| 218 | + part = part.strip() |
| 219 | + if "=" not in part: |
| 220 | + continue |
| 221 | + name, _, value = part.partition("=") |
| 222 | + name = name.strip() |
| 223 | + value = value.strip() |
| 224 | + if name: |
| 225 | + records.append({ |
| 226 | + "name": name, "value": value, |
| 227 | + "domain": domain, "path": "/", |
| 228 | + "secure": True, "expires": None, |
| 229 | + }) |
| 230 | + return records |
| 231 | + |
| 232 | + |
| 233 | +def _env_cookie_records(config: Dict[str, Any]) -> List[Dict]: |
| 234 | + """Collect PAYWALLFETCHER_COOKIE_<NAME>=value env vars as cookie records. |
| 235 | +
|
| 236 | + Each env var whose name starts with ``PAYWALLFETCHER_COOKIE_`` contributes |
| 237 | + one cookie; the cookie name is the suffix after the prefix. |
| 238 | + Returns an empty list if no matching env vars are set. |
| 239 | + """ |
| 240 | + host = _normalize_domain(urlparse(config["base_url"]).netloc.split(":")[0]) |
| 241 | + domain = f".{host}" if host else None |
| 242 | + |
| 243 | + records: List[Dict] = [] |
| 244 | + for key, value in os.environ.items(): |
| 245 | + if not key.startswith(ENV_COOKIE_PREFIX): |
| 246 | + continue |
| 247 | + name = key[len(ENV_COOKIE_PREFIX):] |
| 248 | + value = value.strip() |
| 249 | + if name and value: |
| 250 | + records.append({ |
| 251 | + "name": name, "value": value, |
| 252 | + "domain": domain, "path": "/", |
| 253 | + "secure": True, "expires": None, |
| 254 | + }) |
| 255 | + return records |
| 256 | + |
| 257 | + |
167 | 258 | # ── internals ────────────────────────────────────────────────────────────── |
168 | 259 |
|
169 | 260 |
|
|
0 commit comments