Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 2 additions & 105 deletions src/ispypsa/templater/nodes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
import logging
from pathlib import Path

import pandas as pd
import requests
import xmltodict
from thefuzz import process

from ispypsa.templater.mappings import _NEM_REGION_IDS, _NEM_SUB_REGION_IDS

Expand All @@ -14,37 +8,6 @@
)


def _get_reference_node_locations(reference_nodes):
# request and merge in substation coordinates for reference nodes
substation_coordinates = _request_transmission_substation_coordinates()
if not substation_coordinates.empty:
reference_node_col = process.extractOne(
"reference_node", reference_nodes.columns
)[0]
matched_subs = _fuzzy_match_names(
reference_nodes[reference_node_col],
substation_coordinates.index,
"merging in substation coordinate data",
threshold=85,
)
reference_node_coordinates = pd.merge(
matched_subs,
substation_coordinates,
how="left",
left_on=reference_node_col,
right_index=True,
)
reference_nodes = pd.concat(
[
reference_nodes,
reference_node_coordinates["substation_latitude"],
reference_node_coordinates["substation_longitude"],
],
axis=1,
)
return reference_nodes


def _template_sub_regions(
sub_regional_reference_nodes: pd.DataFrame, mapping_only: bool = False
) -> pd.DataFrame:
Expand All @@ -54,8 +17,8 @@ def _template_sub_regions(
sub_regional_reference_nodes: pd.DataFrame specifying the NEM subregional
reference nodes.
mapping_only: boolean, when doing single region or region modelling this input
is set to True so unnecessary information such sub_region_reference_node
are latitude and longitude are not returned.
is set to True so unnecessary information such as sub_region_reference_node
is not returned.
Returns:
`pd.DataFrame`: ISPyPSA sub-regional node template

Expand Down Expand Up @@ -85,7 +48,6 @@ def _template_sub_regions(
"sub_region_reference_node_voltage_kv",
]
]
sub_regions = _get_reference_node_locations(sub_regions)
return sub_regions


Expand Down Expand Up @@ -122,7 +84,6 @@ def _template_regions(regional_reference_nodes: pd.DataFrame) -> pd.DataFrame:
"regional_reference_node_voltage_kv",
]
]
regions = _get_reference_node_locations(regions)
return regions


Expand Down Expand Up @@ -165,70 +126,6 @@ def _extract_voltage(data: pd.DataFrame, column: str):
return split_node_voltage


def _request_transmission_substation_coordinates() -> pd.DataFrame:
"""
Obtains transmission substation coordinates from a Web Feature Service (WFS)
source hosted as a dataset within the Australian Government's National Map:

https://www.nationalmap.gov.au/#share=s-403jqUldEkbj6CwWcPZHefSgYeA

The requested data is in Geography Markup Language (GML) format, which can be parsed
using the same tools that are used to parse XML.

Returns:
Substation names, latitude and longitude within a :class:`pandas.DataFrame`.
If request error is encountered or the HTTP status of the request is not OK,
then an empty DataFrame will be returned with a warning that network node data
will be templated without coordinate data

"""
params = dict(
service="WFS",
version="2.0.0",
request="GetFeature",
typeNames="National_Electricity_Infrastructure:Electricity_Transmission_Substations",
maxFeatures=10000,
)
url = "https://services.ga.gov.au/gis/services/National_Electricity_Infrastructure/MapServer/WFSServer"
substation_coordinates = {}
try:
r = requests.get(url, params=params, timeout=60)
if r.status_code == 200:
data = xmltodict.parse(r.content)
features = data["wfs:FeatureCollection"]["wfs:member"]
for feature in features:
substation = feature["esri:Electricity_Transmission_Substations"]
name = substation.get("esri:SUBSTATION_NAME")
# The new format stores coordinates in X_COORDINATE and Y_COORDINATE fields
# These are in GDA2020 / MGA coordinates, but we also have SHAPE with gml:pos
if (
"esri:SHAPE" in substation
and "gml:Point" in substation["esri:SHAPE"]
):
coordinates = substation["esri:SHAPE"]["gml:Point"]["gml:pos"]
lat, long = coordinates.split(" ")
substation_coordinates[name] = {
"substation_latitude": lat,
"substation_longitude": long,
}
else:
logging.warning(
f"Failed to fetch substation coordinates. HTTP Status code: {r.status_code}."
)
except requests.exceptions.RequestException as e:
logging.error(f"Error requesting substation coordinate data:\n{e}.")
if not substation_coordinates:
logging.warning(
"Could not get substation coordinate data. "
+ "Network node data will be templated without coordinate data."
)
substation_coordinates = pd.DataFrame(substation_coordinates).T
substation_coordinates = substation_coordinates[
substation_coordinates.index.notna()
]
return substation_coordinates


def _capture_just_name(series: pd.Series) -> pd.DataFrame:
"""
Capture the name (plain English) and not the ID in parentheses (capitalised letters)
Expand Down
141 changes: 2 additions & 139 deletions tests/test_templater/test_nodes.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
import logging
from pathlib import Path

import pandas as pd
import pytest
import requests

from ispypsa.templater.nodes import (
_get_reference_node_locations,
_request_transmission_substation_coordinates,
_template_regions,
_template_sub_regions,
)
Expand All @@ -23,8 +18,7 @@ def test_node_templater_nem_regions(workbook_table_cache_test_path: Path):
("South Pine", "Sydney West")
)
assert set(regional_template.regional_reference_node_voltage_kv) == set((275, 330))
# When coordinates are available, we have 6 columns, otherwise 4
assert len(regional_template.columns) in (4, 6)
assert len(regional_template.columns) == 4


def test_templater_sub_regions(workbook_table_cache_test_path: Path):
Expand All @@ -39,8 +33,7 @@ def test_templater_sub_regions(workbook_table_cache_test_path: Path):
assert set(sub_regions_template.sub_region_reference_node_voltage_kv) == set(
(275, 330)
)
# When coordinates are available, we have 6 columns, otherwise 4
assert len(sub_regions_template.columns) == 6
assert len(sub_regions_template.columns) == 4


def test_templater_sub_regions_mapping_only(workbook_table_cache_test_path: Path):
Expand All @@ -52,133 +45,3 @@ def test_templater_sub_regions_mapping_only(workbook_table_cache_test_path: Path
assert set(sub_regions_template.isp_sub_region_id) == set(("SQ", "NNSW"))
assert set(sub_regions_template.nem_region_id) == set(("QLD", "NSW"))
assert len(sub_regions_template.columns) == 2


def test_no_substation_coordinates(workbook_table_cache_test_path: Path, mocker):
mocker.patch(
# api_call is from slow.py but imported to main.py
"ispypsa.templater.nodes._request_transmission_substation_coordinates",
return_value=pd.DataFrame(({})).T,
)
filepath = workbook_table_cache_test_path / Path("sub_regional_reference_nodes.csv")
sub_regional_reference_nodes = pd.read_csv(filepath)
sub_regions_template = _template_sub_regions(sub_regional_reference_nodes)
assert set(sub_regions_template.isp_sub_region_id) == set(("NNSW", "SQ"))
assert set(sub_regions_template.nem_region_id) == set(("NSW", "QLD"))
assert set(sub_regions_template.sub_region_reference_node) == set(
("South Pine", "Armidale")
)
assert set(sub_regions_template.sub_region_reference_node_voltage_kv) == set(
(275, 330)
)
assert len(sub_regions_template.columns) == 4


def test_substation_coordinate_http_error(requests_mock, caplog):
url = "https://services.ga.gov.au/gis/services/National_Electricity_Infrastructure/MapServer/WFSServer"
requests_mock.get(url, status_code=404)

with caplog.at_level(logging.WARNING):
result = _request_transmission_substation_coordinates()

assert (
"Failed to fetch substation coordinates. HTTP Status code: 404." in caplog.text
)
assert result.empty


def test_substation_coordinate_request_exception(mocker, caplog):
mocker.patch(
"requests.get",
side_effect=requests.exceptions.RequestException("Connection error"),
)

with caplog.at_level(logging.ERROR):
result = _request_transmission_substation_coordinates()

assert (
"Error requesting substation coordinate data:\nConnection error." in caplog.text
)
assert result.empty


def test_substation_coordinate_empty_result_warning(mocker, caplog):
# Mock a successful request that returns data with no valid coordinates
mock_response = mocker.Mock()
mock_response.status_code = 200
# Multiple members to ensure xmltodict returns a list
mock_response.content = b"""<?xml version="1.0" encoding="UTF-8"?>
<wfs:FeatureCollection xmlns:wfs="http://www.opengis.net/wfs" xmlns:esri="http://www.esri.com" xmlns:gml="http://www.opengis.net/gml">
<wfs:member>
<esri:Electricity_Transmission_Substations>
<esri:SUBSTATION_NAME>Test1</esri:SUBSTATION_NAME>
</esri:Electricity_Transmission_Substations>
</wfs:member>
<wfs:member>
<esri:Electricity_Transmission_Substations>
<esri:SUBSTATION_NAME></esri:SUBSTATION_NAME>
</esri:Electricity_Transmission_Substations>
</wfs:member>
</wfs:FeatureCollection>"""
mocker.patch("requests.get", return_value=mock_response)

with caplog.at_level(logging.WARNING):
result = _request_transmission_substation_coordinates()

assert "Could not get substation coordinate data." in caplog.text
assert "Network node data will be templated without coordinate data." in caplog.text
assert result.empty


def test_get_reference_node_locations_with_coordinates(mocker):
# Test the function with valid substation coordinates
reference_nodes = pd.DataFrame(
{
"sub_region_reference_node": ["Test Node 1", "Test Node 2"],
"other_column": ["A", "B"],
}
)

substation_coords = pd.DataFrame(
{
"substation_latitude": [-35.0, -34.0],
"substation_longitude": [150.0, 151.0],
},
index=["Test Node 1", "Test Node 2"],
)

mocker.patch(
"ispypsa.templater.nodes._request_transmission_substation_coordinates",
return_value=substation_coords,
)

result = _get_reference_node_locations(reference_nodes)

assert "substation_latitude" in result.columns
assert "substation_longitude" in result.columns
assert len(result) == 2
assert result["substation_latitude"].iloc[0] == -35.0
assert result["substation_longitude"].iloc[0] == 150.0


def test_get_reference_node_locations_without_coordinates(mocker):
# Test the function when no coordinates are available
reference_nodes = pd.DataFrame(
{
"reference_node": ["Test Node 1", "Test Node 2"],
"other_column": ["A", "B"],
}
)

mocker.patch(
"ispypsa.templater.nodes._request_transmission_substation_coordinates",
return_value=pd.DataFrame(),
)

result = _get_reference_node_locations(reference_nodes)

# Should return the same dataframe without coordinate columns
assert "substation_latitude" not in result.columns
assert "substation_longitude" not in result.columns
assert len(result) == 2
assert list(result.columns) == list(reference_nodes.columns)
Loading