From 13b90322b279cf80924788b850208c04b5af7569 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 8 May 2026 16:44:10 -0500
Subject: [PATCH] Surface real failures in paginated waterdata responses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs in _walk_pages (and the parallel get_stats_data pagination
loop) caused silent or misleading failures when a paginated request was
interrupted mid-walk:

1. The except handler called _error_body(resp) — but when
   client.request() itself raised (ConnectionError, Timeout, etc.),
   `resp` still pointed at the *previous successful page*. The
   "incomplete" log message therefore described the wrong request, and
   on a non-JSON 200 body would itself raise inside resp.json() and
   bury the original failure.

2. No status-code check was performed on paginated responses. The
   initial request guards `if resp.status_code != 200`, but the loop
   doesn't. A 5xx body that didn't include "numberReturned" was
   silently turned into an empty DataFrame by _get_resp_data, the
   `next` link wasn't found, and the loop quietly exited — handing the
   user truncated data with no error logged anywhere. This affects
   every paginated waterdata getter (get_daily, get_continuous,
   get_monitoring_locations, …).

Fix:
- Guard the loop body with `if resp.status_code != 200: raise
  RuntimeError(_error_body(resp))`, mirroring the initial request.
- Capture the exception with `as e` and log `e` instead of touching the
  stale `resp`. Same change in get_stats_data.

Behavior preserved: on failure the loop still logs and returns
whatever pages were collected ("best effort"). The change is purely to
make the failure observable and the log message accurate.

Two new tests cover (a) network-exception mid-pagination and (b) 5xx
mid-pagination, asserting that the actual failure surfaces in the
error log.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 NEWS.md                          |  2 +
 dataretrieval/waterdata/utils.py | 22 +++++---
 tests/waterdata_utils_test.py    | 87 ++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 2faaeb42..548d4878 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+**05/08/2026:** Fixed two latent bugs in the paginated `waterdata` request loop (`_walk_pages` and `get_stats_data`). Previously, when `requests.Session.request(...)` itself raised mid-pagination (network error, timeout), the except block called `_error_body()` on the *prior page's* response, so the logged "error" described the wrong request and could itself crash on non-JSON bodies. Separately, no status-code check was performed on subsequent paginated responses, so a 5xx body that didn't include `numberReturned` was silently treated as an empty page — pagination quietly stopped and the user got truncated data with no error logged. The loop now status-checks each page like the initial request and reports the actual exception. The "best-effort" behavior (return whatever pages were collected) is preserved.
+
 **05/06/2026:** Each remaining active function in `dataretrieval.nwis` now emits a per-function `DeprecationWarning` naming the `waterdata` replacement to migrate to (visible the first time users call each getter). The `nwis` module is scheduled for removal on or after **2027-05-06**.
 
 **05/06/2026:** Added `waterdata.get_ratings(...)` — wraps the new Water Data STAC catalog (`api.waterdata.usgs.gov/stac/v0/search`) for USGS stage-discharge rating curves. Returns parsed `exsa` / `base` / `corr` rating tables as a dict of DataFrames keyed by feature ID, or just the list of available STAC features when `download_and_parse=False`. Mirrors R's `read_waterdata_ratings`.
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index 784f2969..7756e101 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -635,11 +635,18 @@ def _walk_pages(
                     headers=headers,
                     data=content if method == "POST" else None,
                 )
+                # Mirror the initial-request check; otherwise a 5xx body
+                # without "numberReturned" silently yields an empty frame
+                # and the loop quietly stops.
+                if resp.status_code != 200:
+                    raise RuntimeError(_error_body(resp))
                 dfs.append(_get_resp_data(resp, geopd=geopd))
                 curr_url = _next_req_url(resp)
-            except Exception:  # noqa: BLE001
-                error_text = _error_body(resp)
-                logger.error("Request incomplete. %s", error_text)
+            except Exception as e:  # noqa: BLE001
+                # Report the *actual* failure — not _error_body(resp) on a
+                # stale prior-page response (which would describe the wrong
+                # request and can itself raise on non-JSON bodies).
+                logger.error("Request incomplete: %s", e)
                 logger.warning(
                     "Request failed for URL: %s. Data download interrupted.", curr_url
                 )
@@ -1099,14 +1106,15 @@ def get_stats_data(
                     params=args,
                     headers=headers,
                 )
+                if resp.status_code != 200:
+                    raise RuntimeError(_error_body(resp))
                 body = resp.json()
                 all_dfs.append(_handle_stats_nesting(body, geopd=GEOPANDAS))
                 next_token = body["next"]
-            except Exception:  # noqa: BLE001
-                error_text = _error_body(resp)
-                logger.error("Request incomplete. %s", error_text)
+            except Exception as e:  # noqa: BLE001
+                logger.error("Request incomplete: %s", e)
                 logger.warning(
-                    "Request failed for URL: %s. Data download interrupted.", resp.url
+                    "Request failed for URL: %s. Data download interrupted.", url
                 )
                 next_token = None
 
diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py
index 8151ab37..3008be15 100644
--- a/tests/waterdata_utils_test.py
+++ b/tests/waterdata_utils_test.py
@@ -86,6 +86,93 @@ def test_walk_pages_multiple_mocked():
     assert mock_client.request.call_args[0][1] == "https://example.com/page2"
 
 
+def _resp_ok(features):
+    """Build a 200-OK mock response carrying the given features list."""
+    resp = mock.MagicMock()
+    resp.json.return_value = {
+        "numberReturned": len(features),
+        "features": features,
+        "links": [{"rel": "next", "href": "https://example.com/page2"}]
+        if features
+        else [],
+    }
+    resp.headers = {}
+    resp.links = {"next": {"url": "https://example.com/page2"}} if features else {}
+    resp.status_code = 200
+    resp.url = "https://example.com/page1"
+    return resp
+
+
+def _walk_pages_with_failure(failure_resp_or_exc):
+    """Run _walk_pages where page 1 succeeds and page 2 fails as given."""
+    resp1 = _resp_ok([{"id": "1", "properties": {"val": "a"}}])
+
+    mock_client = mock.MagicMock(spec=requests.Session)
+    mock_client.send.return_value = resp1
+    if isinstance(failure_resp_or_exc, BaseException):
+        mock_client.request.side_effect = failure_resp_or_exc
+    else:
+        mock_client.request.return_value = failure_resp_or_exc
+
+    mock_req = mock.MagicMock(spec=requests.PreparedRequest)
+    mock_req.method = "GET"
+    mock_req.headers = {}
+    mock_req.url = "https://example.com/page1"
+
+    return _walk_pages(geopd=False, req=mock_req, client=mock_client)
+
+
+def test_walk_pages_logs_actual_exception_when_request_raises(caplog):
+    """When client.request() itself raises (e.g. a connection error), the
+    logged failure must reflect that exception — not _error_body() on the
+    stale prior-page response, which described the wrong request and could
+    itself crash on non-JSON bodies."""
+    import logging
+
+    caplog.set_level(logging.ERROR, logger="dataretrieval.waterdata.utils")
+
+    df, _ = _walk_pages_with_failure(requests.ConnectionError("boom"))
+
+    # First page's data is preserved (best-effort behavior).
+    assert list(df["val"]) == ["a"]
+    # Logged error mentions the actual ConnectionError, not a stale page body.
+    error_messages = [
+        r.getMessage() for r in caplog.records if r.levelno >= logging.ERROR
+    ]
+    assert any("boom" in m for m in error_messages), error_messages
+
+
+def test_walk_pages_surfaces_5xx_mid_pagination(caplog):
+    """A non-200 response on a paginated page must be detected; previously
+    the loop happily called _get_resp_data() on the 5xx body, which —
+    lacking 'numberReturned' — silently produced an empty DataFrame and
+    the loop quietly stopped with no error logged."""
+    import logging
+
+    caplog.set_level(logging.ERROR, logger="dataretrieval.waterdata.utils")
+
+    page2_500 = mock.MagicMock()
+    page2_500.status_code = 503
+    page2_500.json.return_value = {
+        "code": "ServiceUnavailable",
+        "description": "upstream timeout",
+    }
+    page2_500.url = "https://example.com/page2"
+
+    df, _ = _walk_pages_with_failure(page2_500)
+
+    assert list(df["val"]) == ["a"]
+    error_messages = [
+        r.getMessage() for r in caplog.records if r.levelno >= logging.ERROR
+    ]
+    # The 5xx is now visible in the error log (it would previously have
+    # been silently swallowed because _get_resp_data returned an empty
+    # frame and the loop stopped quietly).
+    assert any("503" in m or "ServiceUnavailable" in m for m in error_messages), (
+        error_messages
+    )
+
+
 def test_handle_stats_nesting_tolerates_missing_drop_columns():
     """If the upstream stats response shape ever changes such that one of
     the columns we try to drop ("type", "properties.data") is absent, the