From da6ac463dd521b522cbe90280c4297d92806746b Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Sun, 3 May 2026 14:04:54 -0500
Subject: [PATCH 1/2] Fix get_nearest_continuous: accept scalar targets and
 missing time column
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The docstring says ``targets`` accepts "anything ``pandas.to_datetime``
consumes", which includes a bare string or ``pd.Timestamp``. But
``pd.to_datetime("2024-01-01T00:00:00Z", utc=True)`` returns a scalar
``Timestamp``, and ``pd.DatetimeIndex(scalar)`` raises ``TypeError`` —
so single-value cases crashed despite the documented contract.

Wrap a scalar result in a one-element ``DatetimeIndex`` so any
``pandas.to_datetime``-consumable input works.

Also: when the user passes ``properties`` that excludes ``time``, the
helper used to crash with ``KeyError`` deep inside ``df.assign``. Detect
the missing column up front and raise a ``ValueError`` pointing at the
likely cause.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/nearest.py | 20 +++++++++++-
 tests/waterdata_nearest_test.py    | 51 ++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/dataretrieval/waterdata/nearest.py b/dataretrieval/waterdata/nearest.py
index 29484875..6bb48187 100644
--- a/dataretrieval/waterdata/nearest.py
+++ b/dataretrieval/waterdata/nearest.py
@@ -137,7 +137,7 @@ def get_nearest_continuous(
         ... )
     """
     _check_nearest_kwargs(kwargs, on_tie)
-    targets = pd.DatetimeIndex(pd.to_datetime(targets, utc=True))
+    targets = _coerce_targets(targets)
     window_td = pd.Timedelta(window)
 
     if len(targets) == 0:
@@ -151,6 +151,11 @@ def get_nearest_continuous(
         filter_lang="cql-text",
         **kwargs,
     )
+    if "time" not in df.columns:
+        raise ValueError(
+            "get_nearest_continuous requires a 'time' column in the response; "
+            "if a `properties` kwarg was passed, include 'time' in it"
+        )
     if df.empty:
         return _empty_nearest_result(df), md
 
@@ -172,6 +177,19 @@ def get_nearest_continuous(
     return pd.DataFrame(selected).reset_index(drop=True), md
 
 
+def _coerce_targets(targets) -> pd.DatetimeIndex:
+    """Accept anything ``pandas.to_datetime`` consumes, including a single value.
+
+    A bare scalar (string, ``Timestamp``, ``datetime``, …) becomes a
+    one-element ``DatetimeIndex``; an iterable round-trips through
+    ``pd.to_datetime`` directly.
+    """
+    parsed = pd.to_datetime(targets, utc=True)
+    if isinstance(parsed, pd.DatetimeIndex):
+        return parsed
+    return pd.DatetimeIndex([parsed])
+
+
 def _check_nearest_kwargs(kwargs: dict, on_tie: OnTie) -> None:
     """Reject kwargs the helper owns; validate ``on_tie``."""
     for forbidden in ("time", "filter", "filter_lang"):
diff --git a/tests/waterdata_nearest_test.py b/tests/waterdata_nearest_test.py
index 4dc0ab9d..3f988a6b 100644
--- a/tests/waterdata_nearest_test.py
+++ b/tests/waterdata_nearest_test.py
@@ -265,3 +265,54 @@ def test_forwards_kwargs_to_get_continuous(patch_get_continuous):
     _, kwargs = patch_get_continuous.call_args
     assert kwargs["statistic_id"] == "00011"
     assert kwargs["approval_status"] == "Approved"
+
+
+def test_accepts_single_string_target(patch_get_continuous):
+    """A bare scalar target must round-trip through pd.to_datetime.
+
+    Regression: previously `pd.DatetimeIndex(pd.to_datetime("...", utc=True))`
+    raised TypeError because pd.to_datetime returns a scalar Timestamp for a
+    single-string input.
+    """
+    patch_get_continuous.return_value = (
+        _fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
+        mock.Mock(),
+    )
+    result, _ = get_nearest_continuous(
+        "2023-06-15T10:30:31Z", monitoring_location_id="USGS-02238500"
+    )
+    assert len(result) == 1
+    assert result["target_time"].iloc[0] == pd.Timestamp(
+        "2023-06-15T10:30:31Z", tz="UTC"
+    )
+
+
+def test_accepts_single_timestamp_target(patch_get_continuous):
+    """A single ``pd.Timestamp`` target also round-trips."""
+    patch_get_continuous.return_value = (
+        _fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
+        mock.Mock(),
+    )
+    target = pd.Timestamp("2023-06-15T10:30:31Z", tz="UTC")
+    result, _ = get_nearest_continuous(target, monitoring_location_id="USGS-02238500")
+    assert len(result) == 1
+
+
+def test_missing_time_column_raises_helpful_error(patch_get_continuous):
+    """If the response has no 'time' column (e.g. user passed `properties`
+    that excluded it), raise ValueError instead of crashing with KeyError.
+    """
+    df_no_time = pd.DataFrame(
+        {
+            "value": [22.4],
+            "monitoring_location_id": ["USGS-02238500"],
+        }
+    )
+    patch_get_continuous.return_value = (df_no_time, mock.Mock())
+
+    with pytest.raises(ValueError, match="'time' column"):
+        get_nearest_continuous(
+            ["2023-06-15T10:30:31Z"],
+            monitoring_location_id="USGS-02238500",
+            properties=["value", "monitoring_location_id"],
+        )

From 23898203427ff71782d091c06c9659ae02af5d2c Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Mon, 4 May 2026 10:09:57 -0500
Subject: [PATCH 2/2] Preserve list-like target inputs and avoid double-tz in
 test

Per copilot review on PR #251:

- _coerce_targets: detect non-DatetimeIndex iterables (Series, ndarray)
  via pd.api.types.is_scalar so the elements are preserved instead of
  being wrapped in a single-element list. Add a regression test passing
  a pd.Series of two timestamps and assert both are processed.
- Tests: drop the redundant tz='UTC' on pd.Timestamp inputs that already
  carry a Z suffix; pandas 2.x raises on double timezone specification.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/nearest.py |  8 +++++---
 tests/waterdata_nearest_test.py    | 22 ++++++++++++++++++----
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/dataretrieval/waterdata/nearest.py b/dataretrieval/waterdata/nearest.py
index 6bb48187..837d7928 100644
--- a/dataretrieval/waterdata/nearest.py
+++ b/dataretrieval/waterdata/nearest.py
@@ -181,13 +181,15 @@ def _coerce_targets(targets) -> pd.DatetimeIndex:
     """Accept anything ``pandas.to_datetime`` consumes, including a single value.
 
     A bare scalar (string, ``Timestamp``, ``datetime``, …) becomes a
-    one-element ``DatetimeIndex``; an iterable round-trips through
-    ``pd.to_datetime`` directly.
+    one-element ``DatetimeIndex``; an iterable (list, ``Series``, ``ndarray``)
+    is wrapped directly so its elements are preserved.
     """
     parsed = pd.to_datetime(targets, utc=True)
     if isinstance(parsed, pd.DatetimeIndex):
         return parsed
-    return pd.DatetimeIndex([parsed])
+    if pd.api.types.is_scalar(parsed):
+        return pd.DatetimeIndex([parsed])
+    return pd.DatetimeIndex(parsed)
 
 
 def _check_nearest_kwargs(kwargs: dict, on_tie: OnTie) -> None:
diff --git a/tests/waterdata_nearest_test.py b/tests/waterdata_nearest_test.py
index 3f988a6b..64deeccd 100644
--- a/tests/waterdata_nearest_test.py
+++ b/tests/waterdata_nearest_test.py
@@ -282,9 +282,7 @@ def test_accepts_single_string_target(patch_get_continuous):
         "2023-06-15T10:30:31Z", monitoring_location_id="USGS-02238500"
     )
     assert len(result) == 1
-    assert result["target_time"].iloc[0] == pd.Timestamp(
-        "2023-06-15T10:30:31Z", tz="UTC"
-    )
+    assert result["target_time"].iloc[0] == pd.Timestamp("2023-06-15T10:30:31Z")
 
 
 def test_accepts_single_timestamp_target(patch_get_continuous):
@@ -293,11 +291,27 @@ def test_accepts_single_timestamp_target(patch_get_continuous):
         _fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
         mock.Mock(),
     )
-    target = pd.Timestamp("2023-06-15T10:30:31Z", tz="UTC")
+    target = pd.Timestamp("2023-06-15T10:30:31Z")
     result, _ = get_nearest_continuous(target, monitoring_location_id="USGS-02238500")
     assert len(result) == 1
 
 
+def test_accepts_pandas_series_targets(patch_get_continuous):
+    """A ``pd.Series`` of timestamps preserves all elements (not just the first)."""
+    patch_get_continuous.return_value = (
+        _fake_df(
+            [
+                {"time": "2023-06-15T10:30:00Z", "value": 22.4},
+                {"time": "2023-06-16T10:30:00Z", "value": 22.5},
+            ]
+        ),
+        mock.Mock(),
+    )
+    targets = pd.Series(["2023-06-15T10:30:31Z", "2023-06-16T10:30:31Z"])
+    result, _ = get_nearest_continuous(targets, monitoring_location_id="USGS-02238500")
+    assert len(result) == 2
+
+
 def test_missing_time_column_raises_helpful_error(patch_get_continuous):
     """If the response has no 'time' column (e.g. user passed `properties`
     that excluded it), raise ValueError instead of crashing with KeyError.