diff --git a/docs/.gitignore b/docs/.gitignore index 3a2f37237..2dc69af33 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,3 +1,5 @@ /.quarto/ _sidebar.yml objects.json + +**/*.quarto_ipynb diff --git a/docs/_quarto.yml b/docs/_quarto.yml index b113355c0..7ed22d01e 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -82,6 +82,7 @@ website: - api/PointObservation.qmd - api/TrackObservation.qmd - api/NodeObservation.qmd + - api/ReachObservation.qmd - section: "Model Result" href: api/model.qmd contents: @@ -170,6 +171,7 @@ quartodoc: - PointObservation - TrackObservation - NodeObservation + - ReachObservation - title: Model Result desc: "" contents: diff --git a/docs/user-guide/network.qmd b/docs/user-guide/network.qmd index a2ff74478..84f735a0e 100644 --- a/docs/user-guide/network.qmd +++ b/docs/user-guide/network.qmd @@ -28,7 +28,7 @@ uv add modelskill[networks] import pandas as pd import numpy as np from typing import Any -from modelskill.network import Network, NetworkNode, NetworkEdge, EdgeBreakPoint +from modelskill.network import Network, NetworkNode, NetworkReach, ReachBreakPoint class ExampleNode(NetworkNode): @@ -51,18 +51,18 @@ class ExampleNode(NetworkNode): return {} -class ExampleEdge(NetworkEdge): - """Edge connecting two nodes with a given length.""" +class ExampleReach(NetworkReach): + """Reach connecting two nodes with a given length.""" def __init__( self, - edge_id: str, + reach_id: str, start: NetworkNode, end: NetworkNode, length: float, breakpoints: list | None = None, ): - self._id = edge_id + self._id = reach_id self._start = start self._end = end self._length = length @@ -89,9 +89,9 @@ class ExampleEdge(NetworkEdge): return self._breakpoints -class ExampleBreakPoint(EdgeBreakPoint): - def __init__(self, edge_id: str, distance: float, data: pd.DataFrame): - self._id = (edge_id, distance) +class ExampleBreakPoint(ReachBreakPoint): + def __init__(self, reach_id: str, distance: float, data: pd.DataFrame): + self._id = (reach_id, distance) self._data = data @property @@ -117,12 +117,12 @@ node_s2 = ExampleNode("sensor_2", df2) node_s3 = ExampleNode("sensor_3", df3) bp = ExampleBreakPoint("r1", 200.0, df4) -edge1 = ExampleEdge("r1", node_s1, node_s2, length=500.0, breakpoints=[bp]) -edge2 = ExampleEdge("r2", node_s2, node_s3, length=300.0) -network = Network(edges=[edge1, edge2]) +reach1 = ExampleReach("r1", node_s1, node_s2, length=500.0, breakpoints=[bp]) +reach2 = ExampleReach("r2", node_s2, node_s3, length=300.0) +network = Network(reaches=[reach1, reach2]) ``` -A **Network** represents a 1D pipe or river network as a directed graph: nodes hold timeseries data (e.g. water level at a junction) and edges carry the topology and reach length between them. Break points along an edge (e.g. cross-section chainages) are supported as observation locations too. +A **Network** represents a 1D pipe or river network as a directed graph: nodes hold timeseries data (e.g. water level at a junction) and reaches carry the topology and reach length between them. Break points along a reach (e.g. cross-section chainages) are supported as observation locations too. The typical workflow is: @@ -168,6 +168,48 @@ A `Res1D` network contains multiple levels that are unified into a generic netwo ![How a Res1D file maps to a Network object. Reaches and nodes are re-indexed as integers; boundary nodes expose `find()`/`recall()` round-trip lookups.](../images/res1d_network_mapping.png) +#### Selective loading + +Large Res1D files can contain thousands of nodes and gridpoints. Loading all of that data into memory is slow and may cause memory issues — especially when you only need the timeseries at a handful of nodes where observations exist. + +`from_res1d` accepts two optional arguments to restrict what gets loaded: + +| Argument | Type | Effect | +|---|---|---| +| `nodes` | `None` \| `str` \| `list[str]` | Control which nodes have timeseries data loaded. `None` (default) loads all nodes; `[]` skips all node data; a name or list loads only those nodes. | +| `reaches` | `None` \| `str` \| `list[str]` | Control which reaches have intermediate gridpoint data populated. `None` (default) loads everything; `[]` skips all gridpoints; a name or list of names loads only those reaches. | + +::: {.callout-note} +Selective loading only controls **which timeseries are held in memory**. The full network topology (nodes, reaches, lengths) is always constructed so that `find()`, `recall()`, and graph algorithms still work on the complete network. +::: + +The most memory-efficient setup — useful when you only care about specific junction nodes — is to pass the node IDs you need and skip all intermediate gridpoints with `reaches=[]`: + +```{python} +network_subset = Network.from_res1d( + path_to_res1d, + nodes=["78", "46"], + reaches=[], +) +network_subset +``` + +If you also need gridpoint data along a particular reach, pass its name (or a list of names): + +```{python} +network_subset = Network.from_res1d( + path_to_res1d, + nodes=["78", "46"], + reaches=["94l1"], +) +network_subset +``` + +When only some nodes are loaded, `to_dataframe()` and `to_dataset()` only contain columns for those nodes — the rest are graph-connected but data-free: + +```{python} +network_subset.to_dataframe(sel="WaterLevel").head() +``` ## Inspecting the Network @@ -219,6 +261,10 @@ network.to_dataframe(sel="WaterLevel").head() After construction, nodes are re-labelled as integers. Use `find()` to go from original coordinates to the integer ID and `recall()` to go back. +::: {.callout-tip} +When creating `NodeObservation` objects for skill assessment you generally do **not** need to call `find()`. You can pass the original string ID as `node=`, and `NetworkModelResult` will resolve it for you during matching. For breakpoints, use `at=(reach, distance)` rather than `node=`. See [Skill assessment workflow](#skill-assessment-workflow) for details. +::: + ```{python} # Look up a named node by its original id node_id = network.find(node="117") @@ -230,7 +276,7 @@ print(network.recall(node_id)) ```{python} # Look up a break point by reach + chainage -bp_id = network.find(edge="94l1", distance=21.285) +bp_id = network.find(reach="94l1", distance=21.285) print(f"Break point (94l1, 21.285) → integer id {bp_id}") print(network.recall(bp_id)) ``` @@ -242,12 +288,12 @@ print(ids) ``` ```{python} -# Edge lookup -ids = network.find(edge="58l1", distance="start") +# Reach lookup +ids = network.find(reach="58l1", distance="start") print(ids) -ids = network.find(edge="58l1", distance=[51.456, 77.185]) +ids = network.find(reach="58l1", distance=[51.456, 77.185]) print(ids) -ids = network.find(edge="58l1", distance=["start", 77.185]) +ids = network.find(reach="58l1", distance=["start", 77.185]) print(ids) ``` @@ -267,23 +313,74 @@ mr `NodeObservation` accepts a file path directly; the observation name is taken from the filename. +The `node=` argument can be specified in three ways, depending on what information you have at hand. + +#### Option A — original string alias + +Pass the original node identifier from the source format (e.g. the Res1D node name) as a plain string. The `NetworkModelResult` resolves it to the correct integer ID at match time, so you do not need to call `network.find()` yourself: ```{python} -obs_1 = ms.NodeObservation(path_to_sensor_data_1, node=network.find(node="78")) -obs_2 = ms.NodeObservation(path_to_sensor_data_2, node=network.find(node="46")) +obs_1 = ms.NodeObservation(path_to_sensor_data_1, node="78") +obs_2 = ms.NodeObservation(path_to_sensor_data_2, node="46") cc = ms.match(obs=[obs_1, obs_2], mod=mr) cc.skill() ``` +::: {.callout-note} +Resolution happens inside `ms.match()`. If the string is not found in the network's alias map a `ValueError` is raised with a clear message indicating which alias could not be resolved. +::: + +#### Option B — breakpoint by `(reach, distance)` tuple + +When your observation sits at a chainage along a reach rather than at a named junction node, you can use the `at` argument and pass a `(reach_id, distance)` tuple. The `NetworkModelResult` looks up the corresponding breakpoint at match time: + +```python +obs_bp = ms.NodeObservation(path_to_sensor_data_1, at=("94l1", 21.285)) + +cc = ms.match(obs=obs_bp, mod=mr) +cc.skill() +``` + +The tuple form is equivalent to calling `network.find(reach="94l1", distance=21.285)` beforehand and is resolved during matching. + +::: {.callout-note} +## Chainage tolerance + +Breakpoint distances are matched with a tolerance of **1 × 10⁻³** (i.e. ±0.001 in whatever distance units the network uses). This means that small floating-point discrepancies between the distance you type and the value stored in the network are handled gracefully. If no breakpoint falls within that tolerance a `ValueError` is raised. +::: + +### 3. Using ReachObservation for reach-uniform quantities + +Some physical quantities — such as **discharge in a pipe** — are conceptually uniform across the whole reach, even though the model stores values at individual breakpoints along the reach. `ReachObservation` lets you associate a timeseries with a named reach without having to identify a specific breakpoint. + +When matched against a `NetworkModelResult`, modelskill automatically extracts model data from an arbitrary breakpoint that belongs to the given reach and verifies that all breakpoints on the reach carry equivalent values. + +```{python} +obs_q = ms.ReachObservation(path_to_sensor_data_1, reach="94l1", name="Q_94l1") +obs_q +``` + +Pass the observation to `ms.match()` exactly as you would a `NodeObservation`. modelskill resolves which breakpoint to use automatically: + +```{python} +mr_q = NetworkModelResult(network, name="MyModel", item="Discharge") +cc_q = ms.match(obs=obs_q, mod=mr_q) +cc_q.skill() +``` + +::: {.callout-note} +Use `ReachObservation` when your measured quantity is representative of the whole reach (e.g. discharge, which is constant along a reach in steady flow). If you need to compare a quantity that varies spatially along the reach (e.g. water level at a specific chainage), use a `NodeObservation` with a `(reach, distance)` tuple instead (see [Option B](#option-b-breakpoint-by-reach-distance-tuple) above). +::: + ## Development ### Custom network formats -In case you have your network data in a format that is not included in [Building a Network](#building-a-network), you can assemble a `Network` object by subclassing the abstract base classes `NetworkNode` and `NetworkEdge`. +In case you have your network data in a format that is not included in [Building a Network](#building-a-network), you can assemble a `Network` object by subclassing the abstract base classes `NetworkNode` and `NetworkReach`. `NetworkNode` requires three properties: `id`, `data`, and `boundary`. -`NetworkEdge` requires five: `id`, `start`, `end`, `length`, and `breakpoints`. +`NetworkReach` requires five: `id`, `start`, `end`, `length`, and `breakpoints`. The following is a simple implementation example: @@ -292,7 +389,7 @@ The following is a simple implementation example: import pandas as pd import numpy as np from typing import Any -from modelskill.network import NetworkNode, NetworkEdge, Network +from modelskill.network import NetworkNode, NetworkReach, Network class ExampleNode(NetworkNode): @@ -315,14 +412,14 @@ class ExampleNode(NetworkNode): return {} -class ExampleEdge(NetworkEdge): - """Edge connecting two nodes with a given length.""" +class ExampleReach(NetworkReach): + """Reach connecting two nodes with a given length.""" def __init__( - self, edge_id: str, start: NetworkNode, end: NetworkNode, length: float, + self, reach_id: str, start: NetworkNode, end: NetworkNode, length: float, breakpoints: list | None = None, ): - self._id = edge_id + self._id = reach_id self._start = start self._end = end self._length = length @@ -350,7 +447,7 @@ class ExampleEdge(NetworkEdge): ``` ::: {.callout-tip} -The three abstract properties that **every** `NetworkNode` subclass must implement are `id`, `data` and `boundary`. If `boundary` is not relevant for your use case, define the property to return an empty dictionary, as in the example above. Similarly, a `NetworkEdge` with no intermediate points can return an empty `breakpoints` list. +The three abstract properties that **every** `NetworkNode` subclass must implement are `id`, `data` and `boundary`. If `boundary` is not relevant for your use case, define the property to return an empty dictionary, as in the example above. Similarly, a `NetworkReach` with no intermediate points can return an empty `breakpoints` list. ::: @@ -362,24 +459,24 @@ node_s1 = ExampleNode("sensor_1", df1) node_s2 = ExampleNode("sensor_2", df2) node_s3 = ExampleNode("sensor_3", df3) -edge1 = ExampleEdge("r1", node_s1, node_s2, length=500.0) -edge2 = ExampleEdge("r2", node_s2, node_s3, length=300.0) +reach1 = ExampleReach("r1", node_s1, node_s2, length=500.0) +reach2 = ExampleReach("r2", node_s2, node_s3, length=300.0) -network = Network(edges=[edge1, edge2]) +network = Network(reaches=[reach1, reach2]) network ``` ### Adding break points along a reach -Break points represent intermediate chainage locations on a reach (e.g. cross-sections). Subclass `EdgeBreakPoint` the same way — implement `id` (a `(edge_id, distance)` tuple) and `data`: +Break points represent intermediate chainage locations on a reach (e.g. cross-sections). Subclass `ReachBreakPoint` the same way — implement `id` (a `(reach_id, distance)` tuple) and `data`: ```python -from modelskill.network import EdgeBreakPoint +from modelskill.network import ReachBreakPoint -class ExampleBreakPoint(EdgeBreakPoint): - def __init__(self, edge_id: str, distance: float, data: pd.DataFrame): - self._id = (edge_id, distance) +class ExampleBreakPoint(ReachBreakPoint): + def __init__(self, reach_id: str, distance: float, data: pd.DataFrame): + self._id = (reach_id, distance) self._data = data @property @@ -393,13 +490,14 @@ class ExampleBreakPoint(EdgeBreakPoint): # df4 is a DataFrame object that has been loaded in memory bp = ExampleBreakPoint("r1", 200.0, df4) -edge1 = ExampleEdge("r1", node_s1, node_s2, length=500.0, breakpoints=[bp]) -edge2 = ExampleEdge("r2", node_s2, node_s3, length=300.0) -network = Network(edges=[edge1, edge2]) +reach1 = ExampleReach("r1", node_s1, node_s2, length=500.0, breakpoints=[bp]) +reach2 = ExampleReach("r2", node_s2, node_s3, length=300.0) +network = Network(reaches=[reach1, reach2]) ``` ## See also * [API reference — NetworkModelResult](../api/NetworkModelResult.qmd) -* [API reference — NodeObservation](../api/NodeObservation.qmd) \ No newline at end of file +* [API reference — NodeObservation](../api/NodeObservation.qmd) +* [API reference — ReachObservation](../api/ReachObservation.qmd) \ No newline at end of file diff --git a/notebooks/Collection_systems_network.ipynb b/notebooks/Collection_systems_network.ipynb index 1df12e66c..eb8d1df0a 100644 --- a/notebooks/Collection_systems_network.ipynb +++ b/notebooks/Collection_systems_network.ipynb @@ -659,7 +659,7 @@ " * node (node) int64 2kB 0 1 2 3 4 5 6 7 ... 252 253 254 255 256 257 258\n", "Data variables:\n", " WaterLevel (time, node) float32 114kB 195.4 194.7 nan ... 188.5 nan nan\n", - " Discharge (time, node) float32 114kB nan nan 5.72e-06 ... nan 0.01692 0.0