diff --git a/README.md b/README.md index 6a4cf4fd..f8c14a36 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ ![Conda Version](https://img.shields.io/conda/v/conda-forge/dataretrieval) ![Downloads](https://static.pepy.tech/badge/dataretrieval) -:warning: USGS data availability and format are changing on Water Quality Portal (WQP). Since March 2024, data obtained from WQP legacy profiles will not include new USGS data or recent updates to existing data. +:warning: USGS data availability and format have changed on Water Quality Portal (WQP). Since March 2024, data obtained from WQP legacy profiles will not include new USGS data or recent updates to existing data. All USGS data (up to and beyond March 2024) are available using the new WQP beta services. You can access the beta services by setting `legacy=False` in the functions in the `wqp` module. + To view the status of changes in data availability and code functionality, visit: https://doi-usgs.github.io/dataRetrieval/articles/Status.html :mega: **09/03/2024:** The groundwater levels service has switched endpoints, and `dataretrieval` was updated accordingly in [`v1.0.10`](https://github.com/DOI-USGS/dataretrieval-python/releases/tag/v1.0.10). Older versions using the discontinued endpoint will return 503 errors for `nwis.get_gwlevels` or the `service='gwlevels'` argument. Visit [Water Data For the Nation](https://waterdata.usgs.gov/blog/wdfn-waterservices-2024/) for more information. @@ -34,15 +35,11 @@ import dataretrieval.nwis as nwis # specify the USGS site code for which we want data. site = '03339000' - # get instantaneous values (iv) df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01') -# get water quality samples (qwdata) -df2 = nwis.get_record(sites=site, service='qwdata', start='2017-12-31', end='2018-01-01') - # get basic info about the site -df3 = nwis.get_record(sites=site, service='site') +df2 = nwis.get_record(sites=site, service='site') ``` Services available from NWIS include: - instantaneous values (iv) @@ -51,13 +48,16 @@ Services available from NWIS include: - site info (site) - discharge peaks (peaks) - discharge measurements (measurements) -* water quality samples (qwdata) -To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example +Water quality data are available from: +- [Samples](https://waterdata.usgs.gov/download-samples/#dataProfile=site) - Discrete USGS water quality data only +- [Water Quality Portal](https://www.waterqualitydata.us/) - Discrete water quality data from USGS and EPA. Older data are available in the legacy WQX version 2 format; all data are available in the beta WQX3.0 format. + +To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example, this function call: ```python nwis.get_record(sites='03339000', service='dv', start='2017-12-31', parameterCd='00060') ``` -will download daily data with the parameter code 00060 (discharge). +...will download daily data with the parameter code 00060 (discharge). ## Accessing the "Internal" NWIS If you're connected to the USGS network, dataretrieval call pull from the internal (non-public) NWIS interface. @@ -97,9 +97,11 @@ Any help in testing, development, documentation and other tasks is welcome. For more details, see the file [CONTRIBUTING.md](CONTRIBUTING.md). -## Package Support -The Water Mission Area of the USGS supports the development and maintenance of `dataretrieval` -and most likely further into the future. +## Need help? + +The Water Mission Area of the USGS supports the development and maintenance of `dataretrieval`. Any questions can be directed to the Computational Tools team at +comptools@usgs.gov. + Resources are available primarily for maintenance and responding to user questions. Priorities on the development of new features are determined by the `dataretrieval` development team. diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py index 200b0182..07374f47 100644 --- a/dataretrieval/__init__.py +++ b/dataretrieval/__init__.py @@ -5,6 +5,7 @@ from dataretrieval.samples import * from dataretrieval.streamstats import * from dataretrieval.utils import * +from dataretrieval.waterdata import * from dataretrieval.waterwatch import * from dataretrieval.wqp import * diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 20c25ca5..96218bda 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -36,7 +36,6 @@ WATERSERVICES_SERVICES = ["dv", "iv", "site", "stat"] WATERDATA_SERVICES = [ - "qwdata", "gwlevels", "measurements", "peaks", @@ -135,125 +134,13 @@ def get_qwdata( **kwargs, ) -> Tuple[pd.DataFrame, BaseMetadata]: """ - Get water sample data from qwdata service. - - .. warning:: - - WARNING: Beginning in March 2024 the NWIS qw data endpoint will - not deliver new data or updates to existing data. - Eventually the endpoint will be retired. For updated information visit: - https://waterdata.usgs.gov.nwis/qwdata - For additional details, see the R package vignette: - https://doi-usgs.github.io/dataRetrieval/articles/Status.html - If you have additional questions about the qw data service, - email CompTools@usgs.gov. - - Parameters - ---------- - sites: string or list of strings, optional, default is None - If the qwdata parameter site_no is supplied, it will overwrite the - sites parameter - start: string, optional, default is None - If the qwdata parameter begin_date is supplied, it will overwrite the - start parameter (YYYY-MM-DD) - end: string, optional, default is None - If the qwdata parameter end_date is supplied, it will overwrite the - end parameter (YYYY-MM-DD) - multi_index: bool, optional - If False, a dataframe with a single-level index (datetime) is returned, - default is True - wide_format : bool, optional - If True, return data in wide format with multiple samples per row and - one row per time, default is True - datetime_index : bool, optional - If True, create a datetime index, default is True - ssl_check: bool, optional - If True, check SSL certificates, if False, do not check SSL, - default is True - **kwargs: optional - If supplied, will be used as query parameters - - Returns - ------- - df: ``pandas.DataFrame`` - Times series data from the NWIS JSON - md: :obj:`dataretrieval.utils.Metadata` - A custom metadata object - - Examples - -------- - .. doctest:: - - >>> # get water sample information for site 11447650 - >>> df, md = dataretrieval.nwis.get_qwdata( - ... sites="11447650", start="2010-01-01", end="2010-02-01" - ... ) + Get water sample data from qwdata service - deprecated, use `get_samples()` + in the waterdata module. """ - warnings.warn( - ( - "WARNING: Starting in March 2024, the NWIS qw data endpoint is " - "retiring and no longer receives updates. For more information, " - "refer to https://waterdata.usgs.gov.nwis/qwdata and " - "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " - "or email CompTools@usgs.gov." + raise NameError( + "`nwis.get_qwdata` has been replaced with `waterdata.get_samples()`." ) - ) - - _check_sites_value_types(sites) - - kwargs["site_no"] = kwargs.pop("site_no", sites) - kwargs["begin_date"] = kwargs.pop("begin_date", start) - kwargs["end_date"] = kwargs.pop("end_date", end) - kwargs["multi_index"] = multi_index - if wide_format: - kwargs["qw_sample_wide"] = "qw_sample_wide" - - payload = { - "agency_cd": "USGS", - "format": "rdb", - "pm_cd_compare": "Greater than", - "inventory_output": "0", - "rdb_inventory_output": "file", - "TZoutput": "0", - "rdb_qw_attributes": "expanded", - "date_format": "YYYY-MM-DD", - "rdb_compression": "value", - "submitted_form": "brief_list", - } - - # check for parameter codes, and reformat query args - qwdata_parameter_code_field = "parameterCd" - if kwargs.get(qwdata_parameter_code_field): - parameter_codes = kwargs.pop(qwdata_parameter_code_field) - parameter_codes = to_str(parameter_codes) - kwargs["multiple_parameter_cds"] = parameter_codes - kwargs["param_cd_operator"] = "OR" - - search_criteria = kwargs.get("list_of_search_criteria") - if search_criteria: - kwargs["list_of_search_criteria"] = "{},{}".format( - search_criteria, "multiple_parameter_cds" - ) - else: - kwargs["list_of_search_criteria"] = "multiple_parameter_cds" - - kwargs.update(payload) - - warnings.warn( - "NWIS qw web services are being retired. " - + "See this note from the R package for more: " - + "https://doi-usgs.github.io/dataRetrieval/articles/qwdata_changes.html", - category=DeprecationWarning, - ) - response = query_waterdata("qwdata", ssl_check=ssl_check, **kwargs) - - df = _read_rdb(response.text) - - if datetime_index is True: - df = format_datetime(df, "sample_dt", "sample_tm", "sample_start_time_datum_cd") - - return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) def get_discharge_measurements( @@ -269,14 +156,10 @@ def get_discharge_measurements( Parameters ---------- sites: string or list of strings, optional, default is None - If the qwdata parameter site_no is supplied, it will overwrite the - sites parameter - start: string, optional, default is None - If the qwdata parameter begin_date is supplied, it will overwrite the - start parameter (YYYY-MM-DD) + start: string, optional, default is None + Supply date in the format: YYYY-MM-DD end: string, optional, default is None - If the qwdata parameter end_date is supplied, it will overwrite the - end parameter (YYYY-MM-DD) + Supply date in the format: YYYY-MM-DD ssl_check: bool, optional If True, check SSL certificates, if False, do not check SSL, default is True @@ -1183,7 +1066,6 @@ def get_record( service: string, default is 'iv' - 'iv' : instantaneous data - 'dv' : daily mean data - - 'qwdata' : discrete samples - 'site' : site description - 'measurements' : discharge measurements - 'peaks': discharge peaks @@ -1212,9 +1094,6 @@ def get_record( >>> # Get latest daily mean data from site 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="dv") - >>> # Get all discrete sample data from site 01585200 - >>> df = dataretrieval.nwis.get_record(sites="01585200", service="qwdata") - >>> # Get site description for site 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site") @@ -1280,18 +1159,6 @@ def get_record( ) return df - elif service == "qwdata": - df, _ = get_qwdata( - site_no=sites, - begin_date=start, - end_date=end, - multi_index=multi_index, - wide_format=wide_format, - ssl_check=ssl_check, - **kwargs, - ) - return df - elif service == "site": df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) return df diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index eb3a614f..c55c1a84 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -6,104 +6,19 @@ from __future__ import annotations -import json -from io import StringIO from typing import TYPE_CHECKING, Literal, get_args import pandas as pd -import requests -from requests.models import PreparedRequest +import warnings from dataretrieval.utils import BaseMetadata, to_str +from dataretrieval.waterdata import get_samples if TYPE_CHECKING: from typing import Optional, Tuple, Union - + from dataretrieval.waterdata import _SERVICES, _PROFILES from pandas import DataFrame - -_BASE_URL = "https://api.waterdata.usgs.gov/samples-data" - -_CODE_SERVICES = Literal[ - "characteristicgroup", - "characteristics", - "counties", - "countries", - "observedproperty", - "samplemedia", - "sitetype", - "states", -] - - -_SERVICES = Literal["activities", "locations", "organizations", "projects", "results"] - -_PROFILES = Literal[ - "actgroup", - "actmetric", - "basicbio", - "basicphyschem", - "count", - "fullbio", - "fullphyschem", - "labsampleprep", - "narrow", - "organization", - "project", - "projectmonitoringlocationweight", - "resultdetectionquantitationlimit", - "sampact", - "site", -] - -_PROFILE_LOOKUP = { - "activities": ["sampact", "actmetric", "actgroup", "count"], - "locations": ["site", "count"], - "organizations": ["organization", "count"], - "projects": ["project", "projectmonitoringlocationweight"], - "results": [ - "fullphyschem", - "basicphyschem", - "fullbio", - "basicbio", - "narrow", - "resultdetectionquantitationlimit", - "labsampleprep", - "count", - ], -} - - -def get_codes(code_service: _CODE_SERVICES) -> DataFrame: - """Return codes from a Samples code service. - - Parameters - ---------- - code_service : string - One of the following options: "states", "counties", "countries" - "sitetype", "samplemedia", "characteristicgroup", "characteristics", - or "observedproperty" - """ - valid_code_services = get_args(_CODE_SERVICES) - if code_service not in valid_code_services: - raise ValueError( - f"Invalid code service: '{code_service}'. " - f"Valid options are: {valid_code_services}." - ) - - url = f"{_BASE_URL}/codeservice/{code_service}?mimeType=application%2Fjson" - - response = requests.get(url) - - response.raise_for_status() - - data_dict = json.loads(response.text) - data_list = data_dict['data'] - - df = pd.DataFrame(data_list) - - return df - def get_usgs_samples( ssl_check: bool = True, service: _SERVICES = "results", @@ -292,60 +207,40 @@ def get_usgs_samples( """ - _check_profiles(service, profile) - - params = { - k: v for k, v in locals().items() - if k not in ["ssl_check", "service", "profile"] - and v is not None - } - - - params.update({"mimeType": "text/csv"}) - - if "boundingBox" in params: - params["boundingBox"] = to_str(params["boundingBox"]) - - url = f"{_BASE_URL}/{service}/{profile}" - - req = PreparedRequest() - req.prepare_url(url, params=params) - print(f"Request: {req.url}") - - response = requests.get(url, params=params, verify=ssl_check) - - response.raise_for_status() - - df = pd.read_csv(StringIO(response.text), delimiter=",") - - return df, BaseMetadata(response) - -def _check_profiles( - service: _SERVICES, - profile: _PROFILES, -) -> None: - """Check whether a service profile is valid. - - Parameters - ---------- - service : string - One of the service names from the "services" list. - profile : string - One of the profile names from "results_profiles", - "locations_profiles", "activities_profiles", - "projects_profiles" or "organizations_profiles". - """ - valid_services = get_args(_SERVICES) - if service not in valid_services: - raise ValueError( - f"Invalid service: '{service}'. " - f"Valid options are: {valid_services}." - ) + warnings.warn( + "`get_usgs_samples` is deprecated and will be removed. Use `waterdata.get_samples` instead.", + DeprecationWarning, + stacklevel=2, + ) + + result = get_samples( + ssl_check=ssl_check, + service=service, + profile=profile, + activityMediaName=activityMediaName, + activityStartDateLower=activityStartDateLower, + activityStartDateUpper=activityStartDateUpper, + activityTypeCode=activityTypeCode, + characteristicGroup=characteristicGroup, + characteristic=characteristic, + characteristicUserSupplied=characteristicUserSupplied, + boundingBox=boundingBox, + countryFips=countryFips, + stateFips=stateFips, + countyFips=countyFips, + siteTypeCode=siteTypeCode, + siteTypeName=siteTypeName, + usgsPCode=usgsPCode, + hydrologicUnit=hydrologicUnit, + monitoringLocationIdentifier=monitoringLocationIdentifier, + organizationIdentifier=organizationIdentifier, + pointLocationLatitude=pointLocationLatitude, + pointLocationLongitude=pointLocationLongitude, + pointLocationWithinMiles=pointLocationWithinMiles, + projectIdentifier=projectIdentifier, + recordIdentifierUserSupplied=recordIdentifierUserSupplied, + ) + + return result - valid_profiles = _PROFILE_LOOKUP[service] - if profile not in valid_profiles: - raise ValueError( - f"Invalid profile: '{profile}' for service '{service}'. " - f"Valid options are: {valid_profiles}." - ) diff --git a/dataretrieval/waterdata.py b/dataretrieval/waterdata.py new file mode 100644 index 00000000..ceed581e --- /dev/null +++ b/dataretrieval/waterdata.py @@ -0,0 +1,350 @@ +"""Functions for downloading data from the Water Data APIs, including the USGS Aquarius Samples database. + +See https://api.waterdata.usgs.gov/ for API reference. +""" + +from __future__ import annotations + +import json +from io import StringIO +from typing import TYPE_CHECKING, Literal, get_args + +import pandas as pd +import requests +from requests.models import PreparedRequest + +from dataretrieval.utils import BaseMetadata, to_str + +if TYPE_CHECKING: + from typing import Optional, Tuple, Union + + from pandas import DataFrame + + +_BASE_URL = "https://api.waterdata.usgs.gov/samples-data" + +_CODE_SERVICES = Literal[ + "characteristicgroup", + "characteristics", + "counties", + "countries", + "observedproperty", + "samplemedia", + "sitetype", + "states", +] + + +_SERVICES = Literal["activities", "locations", "organizations", "projects", "results"] + +_PROFILES = Literal[ + "actgroup", + "actmetric", + "basicbio", + "basicphyschem", + "count", + "fullbio", + "fullphyschem", + "labsampleprep", + "narrow", + "organization", + "project", + "projectmonitoringlocationweight", + "resultdetectionquantitationlimit", + "sampact", + "site", +] + +_PROFILE_LOOKUP = { + "activities": ["sampact", "actmetric", "actgroup", "count"], + "locations": ["site", "count"], + "organizations": ["organization", "count"], + "projects": ["project", "projectmonitoringlocationweight"], + "results": [ + "fullphyschem", + "basicphyschem", + "fullbio", + "basicbio", + "narrow", + "resultdetectionquantitationlimit", + "labsampleprep", + "count", + ], +} + + +def get_codes(code_service: _CODE_SERVICES) -> DataFrame: + """Return codes from a Samples code service. + + Parameters + ---------- + code_service : string + One of the following options: "states", "counties", "countries" + "sitetype", "samplemedia", "characteristicgroup", "characteristics", + or "observedproperty" + """ + valid_code_services = get_args(_CODE_SERVICES) + if code_service not in valid_code_services: + raise ValueError( + f"Invalid code service: '{code_service}'. " + f"Valid options are: {valid_code_services}." + ) + + url = f"{_BASE_URL}/codeservice/{code_service}?mimeType=application%2Fjson" + + response = requests.get(url) + + response.raise_for_status() + + data_dict = json.loads(response.text) + data_list = data_dict['data'] + + df = pd.DataFrame(data_list) + + return df + +def get_samples( + ssl_check: bool = True, + service: _SERVICES = "results", + profile: _PROFILES = "fullphyschem", + activityMediaName: Optional[Union[str, list[str]]] = None, + activityStartDateLower: Optional[str] = None, + activityStartDateUpper: Optional[str] = None, + activityTypeCode: Optional[Union[str, list[str]]] = None, + characteristicGroup: Optional[Union[str, list[str]]] = None, + characteristic: Optional[Union[str, list[str]]] = None, + characteristicUserSupplied: Optional[Union[str, list[str]]] = None, + boundingBox: Optional[list[float]] = None, + countryFips: Optional[Union[str, list[str]]] = None, + stateFips: Optional[Union[str, list[str]]] = None, + countyFips: Optional[Union[str, list[str]]] = None, + siteTypeCode: Optional[Union[str, list[str]]] = None, + siteTypeName: Optional[Union[str, list[str]]] = None, + usgsPCode: Optional[Union[str, list[str]]] = None, + hydrologicUnit: Optional[Union[str, list[str]]] = None, + monitoringLocationIdentifier: Optional[Union[str, list[str]]] = None, + organizationIdentifier: Optional[Union[str, list[str]]] = None, + pointLocationLatitude: Optional[float] = None, + pointLocationLongitude: Optional[float] = None, + pointLocationWithinMiles: Optional[float] = None, + projectIdentifier: Optional[Union[str, list[str]]] = None, + recordIdentifierUserSupplied: Optional[Union[str, list[str]]] = None, +) -> Tuple[DataFrame, BaseMetadata]: + """Search Samples database for USGS water quality data. + This is a wrapper function for the Samples database API. All potential + filters are provided as arguments to the function, but please do not + populate all possible filters; leave as many as feasible with their default + value (None). This is important because overcomplicated web service queries + can bog down the database's ability to return an applicable dataset before + it times out. + + The web GUI for the Samples database can be found here: + https://waterdata.usgs.gov/download-samples/#dataProfile=site + + If you would like more details on feasible query parameters (complete with + examples), please visit the Samples database swagger docs, here: + https://api.waterdata.usgs.gov/samples-data/docs#/ + + Parameters + ---------- + ssl_check : bool, optional + Check the SSL certificate. + service : string + One of the available Samples services: "results", "locations", "activities", + "projects", or "organizations". Defaults to "results". + profile : string + One of the available profiles associated with a service. Options for each + service are: + results - "fullphyschem", "basicphyschem", + "fullbio", "basicbio", "narrow", + "resultdetectionquantitationlimit", + "labsampleprep", "count" + locations - "site", "count" + activities - "sampact", "actmetric", + "actgroup", "count" + projects - "project", "projectmonitoringlocationweight" + organizations - "organization", "count" + activityMediaName : string or list of strings, optional + Name or code indicating environmental medium in which sample was taken. + Check the `activityMediaName_lookup()` function in this module for all + possible inputs. + Example: "Water". + activityStartDateLower : string, optional + The start date if using a date range. Takes the format YYYY-MM-DD. + The logic is inclusive, i.e. it will also return results that + match the date. If left as None, will pull all data on or before + activityStartDateUpper, if populated. + activityStartDateUpper : string, optional + The end date if using a date range. Takes the format YYYY-MM-DD. + The logic is inclusive, i.e. it will also return results that + match the date. If left as None, will pull all data after + activityStartDateLower up to the most recent available results. + activityTypeCode : string or list of strings, optional + Text code that describes type of field activity performed. + Example: "Sample-Routine, regular". + characteristicGroup : string or list of strings, optional + Characteristic group is a broad category of characteristics + describing one or more results. Check the `characteristicGroup_lookup()` + function in this module for all possible inputs. + Example: "Organics, PFAS" + characteristic : string or list of strings, optional + Characteristic is a specific category describing one or more results. + Check the `characteristic_lookup()` function in this module for all + possible inputs. + Example: "Suspended Sediment Discharge" + characteristicUserSupplied : string or list of strings, optional + A user supplied characteristic name describing one or more results. + boundingBox: list of four floats, optional + Filters on the the associated monitoring location's point location + by checking if it is located within the specified geographic area. + The logic is inclusive, i.e. it will include locations that overlap + with the edge of the bounding box. Values are separated by commas, + expressed in decimal degrees, NAD83, and longitudes west of Greenwich + are negative. + The format is a string consisting of: + - Western-most longitude + - Southern-most latitude + - Eastern-most longitude + - Northern-most longitude + Example: [-92.8,44.2,-88.9,46.0] + countryFips : string or list of strings, optional + Example: "US" (United States) + stateFips : string or list of strings, optional + Check the `stateFips_lookup()` function in this module for all + possible inputs. + Example: "US:15" (United States: Hawaii) + countyFips : string or list of strings, optional + Check the `countyFips_lookup()` function in this module for all + possible inputs. + Example: "US:15:001" (United States: Hawaii, Hawaii County) + siteTypeCode : string or list of strings, optional + An abbreviation for a certain site type. Check the `siteType_lookup()` + function in this module for all possible inputs. + Example: "GW" (Groundwater site) + siteTypeName : string or list of strings, optional + A full name for a certain site type. Check the `siteType_lookup()` + function in this module for all possible inputs. + Example: "Well" + usgsPCode : string or list of strings, optional + 5-digit number used in the US Geological Survey computerized + data system, National Water Information System (NWIS), to + uniquely identify a specific constituent. Check the + `characteristic_lookup()` function in this module for all possible + inputs. + Example: "00060" (Discharge, cubic feet per second) + hydrologicUnit : string or list of strings, optional + Max 12-digit number used to describe a hydrologic unit. + Example: "070900020502" + monitoringLocationIdentifier : string or list of strings, optional + A monitoring location identifier has two parts: the agency code + and the location number, separated by a dash (-). + Example: "USGS-040851385" + organizationIdentifier : string or list of strings, optional + Designator used to uniquely identify a specific organization. + Currently only accepting the organization "USGS". + pointLocationLatitude : float, optional + Latitude for a point/radius query (decimal degrees). Must be used + with pointLocationLongitude and pointLocationWithinMiles. + pointLocationLongitude : float, optional + Longitude for a point/radius query (decimal degrees). Must be used + with pointLocationLatitude and pointLocationWithinMiles. + pointLocationWithinMiles : float, optional + Radius for a point/radius query. Must be used with + pointLocationLatitude and pointLocationLongitude + projectIdentifier : string or list of strings, optional + Designator used to uniquely identify a data collection project. Project + identifiers are specific to an organization (e.g. USGS). + Example: "ZH003QW03" + recordIdentifierUserSupplied : string or list of strings, optional + Internal AQS record identifier that returns 1 entry. Only available + for the "results" service. + + Returns + ------- + df : ``pandas.DataFrame`` + Formatted data returned from the API query. + md : :obj:`dataretrieval.utils.Metadata` + Custom ``dataretrieval`` metadata object pertaining to the query. + + Examples + -------- + .. code:: + + >>> # Get PFAS results within a bounding box + >>> df, md = dataretrieval.waterdata.get_samples( + ... boundingBox=[-90.2,42.6,-88.7,43.2], + ... characteristicGroup="Organics, PFAS" + ... ) + + >>> # Get all activities for the Commonwealth of Virginia over a date range + >>> df, md = dataretrieval.waterdata.get_samples( + ... service="activities", + ... profile="sampact", + ... activityStartDateLower="2023-10-01", + ... activityStartDateUpper="2024-01-01", + ... stateFips="US:51") + + >>> # Get all pH samples for two sites in Utah + >>> df, md = dataretrieval.waterdata.get_samples( + ... monitoringLocationIdentifier=['USGS-393147111462301', 'USGS-393343111454101'], + ... usgsPCode='00400') + + """ + + _check_profiles(service, profile) + + params = { + k: v for k, v in locals().items() + if k not in ["ssl_check", "service", "profile"] + and v is not None + } + + + params.update({"mimeType": "text/csv"}) + + if "boundingBox" in params: + params["boundingBox"] = to_str(params["boundingBox"]) + + url = f"{_BASE_URL}/{service}/{profile}" + + req = PreparedRequest() + req.prepare_url(url, params=params) + print(f"Request: {req.url}") + + response = requests.get(url, params=params, verify=ssl_check) + + response.raise_for_status() + + df = pd.read_csv(StringIO(response.text), delimiter=",") + + return df, BaseMetadata(response) + +def _check_profiles( + service: _SERVICES, + profile: _PROFILES, +) -> None: + """Check whether a service profile is valid. + + Parameters + ---------- + service : string + One of the service names from the "services" list. + profile : string + One of the profile names from "results_profiles", + "locations_profiles", "activities_profiles", + "projects_profiles" or "organizations_profiles". + """ + valid_services = get_args(_SERVICES) + if service not in valid_services: + raise ValueError( + f"Invalid service: '{service}'. " + f"Valid options are: {valid_services}." + ) + + valid_profiles = _PROFILE_LOOKUP[service] + if profile not in valid_profiles: + raise ValueError( + f"Invalid profile: '{profile}' for service '{service}'. " + f"Valid options are: {valid_profiles}." + ) + diff --git a/demos/R Python Vignette equivalents.ipynb b/demos/R Python Vignette equivalents.ipynb index f99d82a5..12cd52e3 100755 --- a/demos/R Python Vignette equivalents.ipynb +++ b/demos/R Python Vignette equivalents.ipynb @@ -14,6 +14,7 @@ "outputs": [], "source": [ "from dataretrieval import nwis\n", + "from dataretrieval import waterdata\n", "from dataretrieval import wqp" ] }, @@ -45,8 +46,8 @@ "\n", "# Sample data Nitrate:\n", "parameterCd <- \"00618\"\n", - "qwData <- readNWISqw(siteNumber,parameterCd,\n", - " \"1980-01-01\",\"2010-01-01\")\n", + "qwData <- read_USGS_samples(monitoringLocationIdentifier=sprintf(\"USGS-%s\", siteNumber),usgsPCode=parameterCd,\n", + " activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", "\n", "pCode <- readNWISpCode(parameterCd)\n", "'''\n", @@ -61,7 +62,7 @@ "\n", "# sample data Nitrate:\n", "parameterCd = \"00618\"\n", - "qwData, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd, start=\"1980-01-01\", end=\"2010-01-01\")\n", + "samples_data, md = waterdata.get_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd, activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", "\n", "pCode, md = nwis.get_pmcodes(parameterCd=parameterCd)" ] @@ -199,18 +200,15 @@ "parameterCd <- c(\"00618\",\"71851\")\n", "startDate <- \"1985-10-01\"\n", "endDate <- \"2012-09-30\"\n", - "dfLong <- readNWISqw(siteNumber, parameterCd, \n", - " startDate, endDate)\n", - "# Or the wide return:\n", - "dfWide <- readNWISqw(siteNumber, parameterCd,\n", - " startDate, endDate, reshape=TRUE)\n", + "dfLong <- read_USGS_samples(monitoringLocationIdentifier=sprintf(\"USGS-%s\", siteNumber), usgsPCode=parameterCd, \n", + " activityStartDateLower=startDate, activityStartDateUpper=endDate)\n", "'''\n", "siteNumber = \"01491000\"\n", "parameterCd = [\"00618\",\"71851\"]\n", "startDate = \"1985-10-01\"\n", "endDate = \"2012-09-30\"\n", - "dfLong, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd,\n", - " start=startDate, end=endDate)" + "dfLong, md = waterdata.get_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd,\n", + " activityStartDateLower=startDate, activityStartDateUpper=endDate)" ] }, { diff --git a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb index 44a9f3b3..55ccc084 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb @@ -8,9 +8,9 @@ } }, "source": [ - "# USGS dataretrieval Python Package `get_qwdata()` Examples\n", + "# USGS dataretrieval Python Package `get_samples()` Examples\n", "\n", - "This notebook provides examples of using the Python dataretrieval package to retrieve water quality sample data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." + "This notebook provides examples of using the Python dataretrieval package to retrieve water quality sample data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS Samples database and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." ] }, { @@ -60,7 +60,7 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", + "from dataretrieval import waterdata\n", "from IPython.display import display" ] }, @@ -70,16 +70,119 @@ "source": [ "### Basic Usage\n", "\n", - "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_qwdata()` function to retrieve water quality sample data for USGS monitoring sites from NWIS. The following arguments are supported:\n", + "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_samples()` function to retrieve water quality sample data for USGS monitoring sites from Samples. The following arguments are supported:\n", "\n", - "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", - "\n", - "* **sites** (string or list of strings): A list of USGS site identifiers for which to retrieve data. If the qwdata parameter site_no is supplied, it will overwrite the sites parameter.\n", - "* **parameterCd** (string or list of strings): A list of USGS parameter codes for which to retrieve data.\n", - "* **start** (string): The beginning date for a period for which to retrieve data. If the qwdata parameter begin_date is supplied, it will overwrite the start parameter.\n", - "* **end** (string): The ending date for a period for which to retrieve data. If the qwdata parameter end_date is supplied, it will overwrite the end parameter.\n", - "* **datetime_index** (boolean): If True, create a datetime index\n", - "* **wide_format** (boolean): If True, return data in wide format with multiple samples per row and one row per time." + "* **ssl_check** : boolean, optional\n", + " Check the SSL certificate.\n", + "* **service** : string\n", + " One of the available Samples services: \"results\", \"locations\", \"activities\",\n", + " \"projects\", or \"organizations\". Defaults to \"results\".\n", + "* **profile** : string\n", + " One of the available profiles associated with a service. Options for each\n", + " service are:\n", + " results - \"fullphyschem\", \"basicphyschem\",\n", + " \"fullbio\", \"basicbio\", \"narrow\",\n", + " \"resultdetectionquantitationlimit\",\n", + " \"labsampleprep\", \"count\"\n", + " locations - \"site\", \"count\"\n", + " activities - \"sampact\", \"actmetric\",\n", + " \"actgroup\", \"count\"\n", + " projects - \"project\", \"projectmonitoringlocationweight\"\n", + " organizations - \"organization\", \"count\"\n", + "* **activityMediaName** : string or list of strings, optional\n", + " Name or code indicating environmental medium in which sample was taken.\n", + " Check the `activityMediaName_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"Water\".\n", + "* **activityStartDateLower** : string, optional\n", + " The start date if using a date range. Takes the format YYYY-MM-DD.\n", + " The logic is inclusive, i.e. it will also return results that\n", + " match the date. If left as None, will pull all data on or before\n", + " activityStartDateUpper, if populated.\n", + "* **activityStartDateUpper** : string, optional\n", + " The end date if using a date range. Takes the format YYYY-MM-DD.\n", + " The logic is inclusive, i.e. it will also return results that\n", + " match the date. If left as None, will pull all data after\n", + " activityStartDateLower up to the most recent available results.\n", + "* **activityTypeCode** : string or list of strings, optional\n", + " Text code that describes type of field activity performed.\n", + " Example: \"Sample-Routine, regular\".\n", + "* **characteristicGroup** : string or list of strings, optional\n", + " Characteristic group is a broad category of characteristics\n", + " describing one or more results. Check the `characteristicGroup_lookup()`\n", + " function in this module for all possible inputs.\n", + " Example: \"Organics, PFAS\"\n", + "* **characteristic** : string or list of strings, optional\n", + " Characteristic is a specific category describing one or more results.\n", + " Check the `characteristic_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"Suspended Sediment Discharge\"\n", + "* **characteristicUserSupplied** : string or list of strings, optional\n", + " A user supplied characteristic name describing one or more results.\n", + "* **boundingBox**: list of four floats, optional\n", + " Filters on the the associated monitoring location's point location\n", + " by checking if it is located within the specified geographic area. \n", + " The logic is inclusive, i.e. it will include locations that overlap\n", + " with the edge of the bounding box. Values are separated by commas,\n", + " expressed in decimal degrees, NAD83, and longitudes west of Greenwich\n", + " are negative.\n", + " The format is a string consisting of:\n", + " - Western-most longitude\n", + " - Southern-most latitude\n", + " - Eastern-most longitude\n", + " - Northern-most longitude \n", + " Example: [-92.8,44.2,-88.9,46.0]\n", + "* **countryFips** : string or list of strings, optional\n", + " Example: \"US\" (United States)\n", + "* **stateFips** : string or list of strings, optional\n", + " Check the `stateFips_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"US:15\" (United States: Hawaii)\n", + "* **countyFips** : string or list of strings, optional\n", + " Check the `countyFips_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"US:15:001\" (United States: Hawaii, Hawaii County)\n", + "* **siteTypeCode** : string or list of strings, optional\n", + " An abbreviation for a certain site type. Check the `siteType_lookup()`\n", + " function in this module for all possible inputs.\n", + " Example: \"GW\" (Groundwater site)\n", + "* **siteTypeName** : string or list of strings, optional\n", + " A full name for a certain site type. Check the `siteType_lookup()`\n", + " function in this module for all possible inputs.\n", + " Example: \"Well\"\n", + "* **usgsPCode** : string or list of strings, optional\n", + " 5-digit number used in the US Geological Survey computerized\n", + " data system, National Water Information System (NWIS), to\n", + " uniquely identify a specific constituent. Check the \n", + " `characteristic_lookup()` function in this module for all possible\n", + " inputs.\n", + " Example: \"00060\" (Discharge, cubic feet per second)\n", + "* **hydrologicUnit** : string or list of strings, optional\n", + " Max 12-digit number used to describe a hydrologic unit.\n", + " Example: \"070900020502\"\n", + "* **monitoringLocationIdentifier** : string or list of strings, optional\n", + " A monitoring location identifier has two parts: the agency code\n", + " and the location number, separated by a dash (-).\n", + " Example: \"USGS-040851385\"\n", + "* **organizationIdentifier** : string or list of strings, optional\n", + " Designator used to uniquely identify a specific organization.\n", + " Currently only accepting the organization \"USGS\".\n", + "* **pointLocationLatitude** : float, optional\n", + " Latitude for a point/radius query (decimal degrees). Must be used\n", + " with pointLocationLongitude and pointLocationWithinMiles.\n", + "* **pointLocationLongitude** : float, optional\n", + " Longitude for a point/radius query (decimal degrees). Must be used\n", + " with pointLocationLatitude and pointLocationWithinMiles.\n", + "* **pointLocationWithinMiles** : float, optional\n", + " Radius for a point/radius query. Must be used with\n", + " pointLocationLatitude and pointLocationLongitude\n", + "* **projectIdentifier** : string or list of strings, optional\n", + " Designator used to uniquely identify a data collection project. Project\n", + " identifiers are specific to an organization (e.g. USGS).\n", + " Example: \"ZH003QW03\"\n", + "* **recordIdentifierUserSupplied** : string or list of strings, optional\n", + " Internal AQS record identifier that returns 1 entry. Only available\n", + " for the \"results\" service." ] }, { @@ -103,8 +206,8 @@ }, "outputs": [], "source": [ - "siteID = '10109000'\n", - "wq_data = nwis.get_qwdata(sites=siteID)\n", + "siteID = 'USGS-10109000'\n", + "wq_data = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", "print('Retrieved data for ' + str(len(wq_data[0])) + ' samples.')" ] }, @@ -114,7 +217,7 @@ "source": [ "### Interpreting the Result\n", "\n", - "The result of calling the `get_qwdata()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water quality sample data for the requested site, and or observed variables and time frame.\n", + "The result of calling the `get_samples()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water quality sample data for the requested site, and or observed variables and time frame.\n", "\n", "Once you've got the data frame, there's several useful things you can do to explore the data." ] @@ -127,7 +230,7 @@ } }, "source": [ - "Display the data frame as a table. The default data frame for this function is a wide, cross-tabulated table, with columns for each observed variable and a row for each sample date (wide_format=True)." + "Display the data frame as a table. The default data frame for this function is a long, flat table, with a row for each observed variable at a given site and date/time." ] }, { @@ -175,7 +278,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The other part of the result returned from the `get_qwdata()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." + "The other part of the result returned from the `get_samples()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." ] }, { @@ -192,7 +295,7 @@ }, "outputs": [], "source": [ - "print('The query URL used to retrieve the data from NWIS was: ' + wq_data[1].url)" + "print('The query URL used to retrieve the data from USGS Samples was: ' + wq_data[1].url)" ] }, { @@ -218,27 +321,9 @@ }, "outputs": [], "source": [ - "site_ids = ['04024430', '04024000']\n", + "site_ids = ['USGS-04024430', 'USGS-04024000']\n", "parameter_code = '00065'\n", - "wq_multi_site = nwis.get_qwdata(sites=site_ids, parameterCd=parameter_code)\n", - "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", - "display(wq_multi_site[0])" - ] - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "site_ids = ['04024430', '04024000']\n", - "parameter_code = '00065'\n", - "wq_multi_site = nwis.get_qwdata(sites=site_ids, parameterCd=parameter_code, multi_index=False)\n", + "wq_multi_site = waterdata.get_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameter_code)\n", "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", "display(wq_multi_site[0])" ] @@ -251,7 +336,7 @@ } }, "source": [ - "#### Example 3: Retrieve water quality sample data for multiple sites, including a list of parameters, within a time period defined by start and end dates" + "#### Example 3: Retrieve water quality sample data for multiple sites, including a list of parameters, within a time period defined by start date until present" ] }, { @@ -268,44 +353,22 @@ }, "outputs": [], "source": [ - "site_ids = ['04024430', '04024000']\n", + "site_ids = ['USGS-04024430', 'USGS-04024000']\n", "parameterCd = ['34247', '30234', '32104', '34220']\n", "startDate = '2012-01-01'\n", - "endDate = ''\n", - "wq_data2 = nwis.get_qwdata(sites=site_ids, parameterCd=parameterCd,\n", - " start=startDate, end=endDate)\n", + "wq_data2 = waterdata.get_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameterCd,\n", + " activityStartDateLower=startDate)\n", "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", "display(wq_data2[0])\n" ] }, - { - "metadata": {}, - "cell_type": "markdown", - "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "site_ids = ['04024430', '04024000']\n", - "parameterCd = ['34247', '30234', '32104', '34220']\n", - "startDate = '2012-01-01'\n", - "endDate = ''\n", - "wq_data2 = nwis.get_qwdata(sites=site_ids, parameterCd=parameterCd,\n", - " start=startDate, end=endDate, multi_index=False)\n", - "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", - "display(wq_data2[0])" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Example 4: Retrieve water quality sample data for one site in serial format\n", + "#### Example 4: Retrieve water quality sample data for one site and convert to a wide format\n", "\n", - "Each row in the resulting table represents a single observation of a single parameters. Each sample may be analyzed for multiple parameters and so a single water quality sample can result in multiple rows in serial format." + "Note that the USGS Samples database returns multiple parameters in a \"long\" format: each row in the resulting table represents a single observation of a single parameters. Furthermore, every observation has 181 fields of metadata. However, if you wanted to place your water quality data into a \"wide\" format, where each column represents a water quality parameter code, the code below details one solution." ] }, { @@ -314,16 +377,19 @@ "metadata": {}, "outputs": [], "source": [ - "siteID = '10109000'\n", - "wq_data = nwis.get_qwdata(sites=siteID, wide_format=False)\n", - "print('Retrieved data for ' + str(len(wq_data[0])) + ' sample results.')\n", - "display(wq_data[0])" + "siteID = 'USGS-10109000'\n", + "wq_data,_ = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", + "print('Retrieved data for ' + str(len(wq_data)) + ' sample results.')\n", + "\n", + "wq_data[\"characteristic_unit\"] = wq_data[\"Result_Characteristic\"] + \", \" + wq_data[\"Result_MeasureUnit\"]\n", + "wq_data_wide = wq_data.pivot_table(index=['Location_Identifier', 'Activity_StartDate', 'Activity_StartTime'], columns=\"characteristic_unit\", values=\"Result_Measure\", aggfunc='first')\n", + "display(wq_data_wide)\n" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "hyswap-dev-environment", "language": "python", "name": "python3" }, @@ -337,7 +403,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index a2515073..1c9c498d 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -8,8 +8,10 @@ API reference :maxdepth: 1 nadp + nldi nwis samples streamstats utils + waterdata wqp diff --git a/docs/source/reference/nldi.rst b/docs/source/reference/nldi.rst new file mode 100644 index 00000000..a417954b --- /dev/null +++ b/docs/source/reference/nldi.rst @@ -0,0 +1,8 @@ +.. _nldi + +dataretrieval.nldi +------------------ + +.. automodule:: dataretrieval.nldi + :members: + :special-members: \ No newline at end of file diff --git a/docs/source/reference/waterdata.rst b/docs/source/reference/waterdata.rst new file mode 100644 index 00000000..dc2cd0b3 --- /dev/null +++ b/docs/source/reference/waterdata.rst @@ -0,0 +1,8 @@ +.. _samples + +dataretrieval.waterdata +------------------------- + +.. automodule:: dataretrieval.waterdata + :members: + :special-members: \ No newline at end of file diff --git a/tests/samples_test.py b/tests/waterdata_test.py similarity index 91% rename from tests/samples_test.py rename to tests/waterdata_test.py index c3e2a995..50eefdc5 100755 --- a/tests/samples_test.py +++ b/tests/waterdata_test.py @@ -3,9 +3,11 @@ import pytest from pandas import DataFrame -from dataretrieval.samples import ( +from dataretrieval.waterdata import ( _check_profiles, - get_usgs_samples + get_samples, + _SERVICES, + _PROFILES ) def mock_request(requests_mock, request_url, file_path): @@ -15,7 +17,7 @@ def mock_request(requests_mock, request_url, file_path): request_url, text=text.read(), headers={"mock_header": "value"} ) -def test_mock_get_usgs_samples(requests_mock): +def test_mock_get_samples(requests_mock): """Tests USGS Samples query""" request_url = ( "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?" @@ -24,7 +26,7 @@ def test_mock_get_usgs_samples(requests_mock): ) response_file_path = "data/samples_results.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_usgs_samples( + df, md = get_samples( service="results", profile="fullphyschem", activityMediaName="Water", @@ -48,7 +50,7 @@ def test_check_profiles(): def test_samples_results(): """Test results call for proper columns""" - df,_ = get_usgs_samples( + df,_ = get_samples( service="results", profile="narrow", monitoringLocationIdentifier="USGS-05288705", @@ -60,7 +62,7 @@ def test_samples_results(): def test_samples_activity(): """Test activity call for proper columns""" - df,_ = get_usgs_samples( + df,_ = get_samples( service="activities", profile="sampact", monitoringLocationIdentifier="USGS-06719505" @@ -71,7 +73,7 @@ def test_samples_activity(): def test_samples_locations(): """Test locations call for proper columns""" - df,_ = get_usgs_samples( + df,_ = get_samples( service="locations", profile="site", stateFips="US:55", @@ -84,7 +86,7 @@ def test_samples_locations(): def test_samples_projects(): """Test projects call for proper columns""" - df,_ = get_usgs_samples( + df,_ = get_samples( service="projects", profile="project", stateFips="US:15", @@ -96,7 +98,7 @@ def test_samples_projects(): def test_samples_organizations(): """Test organizations call for proper columns""" - df,_ = get_usgs_samples( + df,_ = get_samples( service="organizations", profile="count", stateFips="US:01" diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py index 323b6051..19cc30fb 100755 --- a/tests/waterservices_test.py +++ b/tests/waterservices_test.py @@ -11,7 +11,6 @@ get_info, get_iv, get_pmcodes, - get_qwdata, get_ratings, get_record, get_stats, @@ -203,65 +202,6 @@ def test_get_info(requests_mock): assert_metadata(requests_mock, request_url, md, site, [parameter_cd], format) -def test_get_qwdata(requests_mock): - """Tests get_qwdata method correctly generates the request url and returns - the result in a DataFrame""" - format = "rdb" - site = "01491000%2C01645000" - request_url = ( - "https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}" - "&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than" - "&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded" - "&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list".format( - site, format - ) - ) - response_file_path = "data/waterdata_qwdata.txt" - mock_request(requests_mock, request_url, response_file_path) - with pytest.warns(DeprecationWarning): - df, md = get_qwdata(sites=["01491000", "01645000"]) - if not isinstance(df, DataFrame): - raise AssertionError(f"{type(df)} is not DataFrame base class type") - - if "geometry" in list(df): - if not isinstance(df, gpd.GeoDataFrame): - raise AssertionError(f"{type(df)} is not a GeoDataFrame") - - geom_type = df.geom_type.unique() - if len(geom_type) > 1 or geom_type[0] != "Point": - raise AssertionError( - f"Geometry type {geom_type} not valid, expecting Point" - ) - - assert df.size == 1821472 - assert_metadata(requests_mock, request_url, md, site, None, format) - - -@pytest.mark.parametrize("site_input_type_list", [True, False]) -def test_get_qwdata_site_value_types(requests_mock, site_input_type_list): - """Tests get_qwdata method for valid input types for the 'sites' parameter""" - _format = "rdb" - site = "01491000" - request_url = ( - "https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}" - "&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than" - "&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded" - "&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list".format( - site, _format - ) - ) - response_file_path = "data/waterdata_qwdata.txt" - mock_request(requests_mock, request_url, response_file_path) - if site_input_type_list: - sites = [site] - else: - sites = site - with pytest.warns(DeprecationWarning): - df, md = get_qwdata(sites=sites) - assert type(df) is DataFrame - assert df.size == 1821472 - - def test_get_gwlevels(requests_mock): """Tests get_gwlevels method correctly generates the request url and returns the result in a DataFrame.""" format = "rdb"