diff --git a/src/ispypsa/templater/nodes.py b/src/ispypsa/templater/nodes.py index f4dc28e7..267f2e11 100644 --- a/src/ispypsa/templater/nodes.py +++ b/src/ispypsa/templater/nodes.py @@ -1,10 +1,4 @@ -import logging -from pathlib import Path - import pandas as pd -import requests -import xmltodict -from thefuzz import process from ispypsa.templater.mappings import _NEM_REGION_IDS, _NEM_SUB_REGION_IDS @@ -14,37 +8,6 @@ ) -def _get_reference_node_locations(reference_nodes): - # request and merge in substation coordinates for reference nodes - substation_coordinates = _request_transmission_substation_coordinates() - if not substation_coordinates.empty: - reference_node_col = process.extractOne( - "reference_node", reference_nodes.columns - )[0] - matched_subs = _fuzzy_match_names( - reference_nodes[reference_node_col], - substation_coordinates.index, - "merging in substation coordinate data", - threshold=85, - ) - reference_node_coordinates = pd.merge( - matched_subs, - substation_coordinates, - how="left", - left_on=reference_node_col, - right_index=True, - ) - reference_nodes = pd.concat( - [ - reference_nodes, - reference_node_coordinates["substation_latitude"], - reference_node_coordinates["substation_longitude"], - ], - axis=1, - ) - return reference_nodes - - def _template_sub_regions( sub_regional_reference_nodes: pd.DataFrame, mapping_only: bool = False ) -> pd.DataFrame: @@ -54,8 +17,8 @@ def _template_sub_regions( sub_regional_reference_nodes: pd.DataFrame specifying the NEM subregional reference nodes. mapping_only: boolean, when doing single region or region modelling this input - is set to True so unnecessary information such sub_region_reference_node - are latitude and longitude are not returned. + is set to True so unnecessary information such as sub_region_reference_node + is not returned. Returns: `pd.DataFrame`: ISPyPSA sub-regional node template @@ -85,7 +48,6 @@ def _template_sub_regions( "sub_region_reference_node_voltage_kv", ] ] - sub_regions = _get_reference_node_locations(sub_regions) return sub_regions @@ -122,7 +84,6 @@ def _template_regions(regional_reference_nodes: pd.DataFrame) -> pd.DataFrame: "regional_reference_node_voltage_kv", ] ] - regions = _get_reference_node_locations(regions) return regions @@ -165,70 +126,6 @@ def _extract_voltage(data: pd.DataFrame, column: str): return split_node_voltage -def _request_transmission_substation_coordinates() -> pd.DataFrame: - """ - Obtains transmission substation coordinates from a Web Feature Service (WFS) - source hosted as a dataset within the Australian Government's National Map: - - https://www.nationalmap.gov.au/#share=s-403jqUldEkbj6CwWcPZHefSgYeA - - The requested data is in Geography Markup Language (GML) format, which can be parsed - using the same tools that are used to parse XML. - - Returns: - Substation names, latitude and longitude within a :class:`pandas.DataFrame`. - If request error is encountered or the HTTP status of the request is not OK, - then an empty DataFrame will be returned with a warning that network node data - will be templated without coordinate data - - """ - params = dict( - service="WFS", - version="2.0.0", - request="GetFeature", - typeNames="National_Electricity_Infrastructure:Electricity_Transmission_Substations", - maxFeatures=10000, - ) - url = "https://services.ga.gov.au/gis/services/National_Electricity_Infrastructure/MapServer/WFSServer" - substation_coordinates = {} - try: - r = requests.get(url, params=params, timeout=60) - if r.status_code == 200: - data = xmltodict.parse(r.content) - features = data["wfs:FeatureCollection"]["wfs:member"] - for feature in features: - substation = feature["esri:Electricity_Transmission_Substations"] - name = substation.get("esri:SUBSTATION_NAME") - # The new format stores coordinates in X_COORDINATE and Y_COORDINATE fields - # These are in GDA2020 / MGA coordinates, but we also have SHAPE with gml:pos - if ( - "esri:SHAPE" in substation - and "gml:Point" in substation["esri:SHAPE"] - ): - coordinates = substation["esri:SHAPE"]["gml:Point"]["gml:pos"] - lat, long = coordinates.split(" ") - substation_coordinates[name] = { - "substation_latitude": lat, - "substation_longitude": long, - } - else: - logging.warning( - f"Failed to fetch substation coordinates. HTTP Status code: {r.status_code}." - ) - except requests.exceptions.RequestException as e: - logging.error(f"Error requesting substation coordinate data:\n{e}.") - if not substation_coordinates: - logging.warning( - "Could not get substation coordinate data. " - + "Network node data will be templated without coordinate data." - ) - substation_coordinates = pd.DataFrame(substation_coordinates).T - substation_coordinates = substation_coordinates[ - substation_coordinates.index.notna() - ] - return substation_coordinates - - def _capture_just_name(series: pd.Series) -> pd.DataFrame: """ Capture the name (plain English) and not the ID in parentheses (capitalised letters) diff --git a/tests/test_templater/test_nodes.py b/tests/test_templater/test_nodes.py index 59ce7797..d2c2b560 100644 --- a/tests/test_templater/test_nodes.py +++ b/tests/test_templater/test_nodes.py @@ -1,13 +1,8 @@ -import logging from pathlib import Path import pandas as pd -import pytest -import requests from ispypsa.templater.nodes import ( - _get_reference_node_locations, - _request_transmission_substation_coordinates, _template_regions, _template_sub_regions, ) @@ -23,8 +18,7 @@ def test_node_templater_nem_regions(workbook_table_cache_test_path: Path): ("South Pine", "Sydney West") ) assert set(regional_template.regional_reference_node_voltage_kv) == set((275, 330)) - # When coordinates are available, we have 6 columns, otherwise 4 - assert len(regional_template.columns) in (4, 6) + assert len(regional_template.columns) == 4 def test_templater_sub_regions(workbook_table_cache_test_path: Path): @@ -39,8 +33,7 @@ def test_templater_sub_regions(workbook_table_cache_test_path: Path): assert set(sub_regions_template.sub_region_reference_node_voltage_kv) == set( (275, 330) ) - # When coordinates are available, we have 6 columns, otherwise 4 - assert len(sub_regions_template.columns) == 6 + assert len(sub_regions_template.columns) == 4 def test_templater_sub_regions_mapping_only(workbook_table_cache_test_path: Path): @@ -52,133 +45,3 @@ def test_templater_sub_regions_mapping_only(workbook_table_cache_test_path: Path assert set(sub_regions_template.isp_sub_region_id) == set(("SQ", "NNSW")) assert set(sub_regions_template.nem_region_id) == set(("QLD", "NSW")) assert len(sub_regions_template.columns) == 2 - - -def test_no_substation_coordinates(workbook_table_cache_test_path: Path, mocker): - mocker.patch( - # api_call is from slow.py but imported to main.py - "ispypsa.templater.nodes._request_transmission_substation_coordinates", - return_value=pd.DataFrame(({})).T, - ) - filepath = workbook_table_cache_test_path / Path("sub_regional_reference_nodes.csv") - sub_regional_reference_nodes = pd.read_csv(filepath) - sub_regions_template = _template_sub_regions(sub_regional_reference_nodes) - assert set(sub_regions_template.isp_sub_region_id) == set(("NNSW", "SQ")) - assert set(sub_regions_template.nem_region_id) == set(("NSW", "QLD")) - assert set(sub_regions_template.sub_region_reference_node) == set( - ("South Pine", "Armidale") - ) - assert set(sub_regions_template.sub_region_reference_node_voltage_kv) == set( - (275, 330) - ) - assert len(sub_regions_template.columns) == 4 - - -def test_substation_coordinate_http_error(requests_mock, caplog): - url = "https://services.ga.gov.au/gis/services/National_Electricity_Infrastructure/MapServer/WFSServer" - requests_mock.get(url, status_code=404) - - with caplog.at_level(logging.WARNING): - result = _request_transmission_substation_coordinates() - - assert ( - "Failed to fetch substation coordinates. HTTP Status code: 404." in caplog.text - ) - assert result.empty - - -def test_substation_coordinate_request_exception(mocker, caplog): - mocker.patch( - "requests.get", - side_effect=requests.exceptions.RequestException("Connection error"), - ) - - with caplog.at_level(logging.ERROR): - result = _request_transmission_substation_coordinates() - - assert ( - "Error requesting substation coordinate data:\nConnection error." in caplog.text - ) - assert result.empty - - -def test_substation_coordinate_empty_result_warning(mocker, caplog): - # Mock a successful request that returns data with no valid coordinates - mock_response = mocker.Mock() - mock_response.status_code = 200 - # Multiple members to ensure xmltodict returns a list - mock_response.content = b""" - - - - Test1 - - - - - - - - """ - mocker.patch("requests.get", return_value=mock_response) - - with caplog.at_level(logging.WARNING): - result = _request_transmission_substation_coordinates() - - assert "Could not get substation coordinate data." in caplog.text - assert "Network node data will be templated without coordinate data." in caplog.text - assert result.empty - - -def test_get_reference_node_locations_with_coordinates(mocker): - # Test the function with valid substation coordinates - reference_nodes = pd.DataFrame( - { - "sub_region_reference_node": ["Test Node 1", "Test Node 2"], - "other_column": ["A", "B"], - } - ) - - substation_coords = pd.DataFrame( - { - "substation_latitude": [-35.0, -34.0], - "substation_longitude": [150.0, 151.0], - }, - index=["Test Node 1", "Test Node 2"], - ) - - mocker.patch( - "ispypsa.templater.nodes._request_transmission_substation_coordinates", - return_value=substation_coords, - ) - - result = _get_reference_node_locations(reference_nodes) - - assert "substation_latitude" in result.columns - assert "substation_longitude" in result.columns - assert len(result) == 2 - assert result["substation_latitude"].iloc[0] == -35.0 - assert result["substation_longitude"].iloc[0] == 150.0 - - -def test_get_reference_node_locations_without_coordinates(mocker): - # Test the function when no coordinates are available - reference_nodes = pd.DataFrame( - { - "reference_node": ["Test Node 1", "Test Node 2"], - "other_column": ["A", "B"], - } - ) - - mocker.patch( - "ispypsa.templater.nodes._request_transmission_substation_coordinates", - return_value=pd.DataFrame(), - ) - - result = _get_reference_node_locations(reference_nodes) - - # Should return the same dataframe without coordinate columns - assert "substation_latitude" not in result.columns - assert "substation_longitude" not in result.columns - assert len(result) == 2 - assert list(result.columns) == list(reference_nodes.columns)