diff --git a/.gitignore b/.gitignore index 5fe83c2a9..f0f7da2f9 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,6 @@ dmypy.json # NFS synchronisation files .nfs* + +#MacOS temp files +.DS_Store \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e23672040..b5716c1b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "scipy", "scikit-image", "dask", + "simple-track", ] [project.urls] diff --git a/requirements/environment.yml b/requirements/environment.yml index 4485c1e61..2d6f9157b 100644 --- a/requirements/environment.yml +++ b/requirements/environment.yml @@ -17,6 +17,7 @@ dependencies: - scikit-image # For image processing techniques. - dask-core # Dask with minimal dependencies. - proj = 9.7.1 # Newer versions break plotting, see issue #2052. + - simple-track # For feature tracking and cell stats operators # Build dependencies - setuptools>=64 diff --git a/src/CSET/operators/__init__.py b/src/CSET/operators/__init__.py index ef40fdf22..44e65cd2b 100644 --- a/src/CSET/operators/__init__.py +++ b/src/CSET/operators/__init__.py @@ -33,6 +33,7 @@ constraints, convection, ensembles, + feature, filters, imageprocessing, mesoscale, @@ -55,6 +56,7 @@ "convection", "ensembles", "execute_recipe", + "feature", "filters", "get_operator", "imageprocessing", @@ -105,6 +107,7 @@ def get_operator(name: str): operator = CSET.operators for section in name_sections: operator = getattr(operator, section) + if callable(operator): return operator else: diff --git a/src/CSET/operators/_colorbar_definition.json b/src/CSET/operators/_colorbar_definition.json index 757219d00..a8778301a 100644 --- a/src/CSET/operators/_colorbar_definition.json +++ b/src/CSET/operators/_colorbar_definition.json @@ -178,6 +178,50 @@ "max": 1.1, "min": 0.5 }, + "feature_id": { + "cmap": "viridis", + "levels": [ + 1, + 50, + 100, + 150, + 200, + 250, + 300, + 350, + 400 + ], + "ymax": 1.0, + "ymin": 0.0 + }, + "feature_lifetime": { + "cmap": "YlGnBu", + "levels": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12 + ], + "ymax": 1.0, + "ymin": 0.0 + }, + "feature_init": { + "cmap": "Blues", + "levels": [ + 0.5, + 1 + ], + "ymax": 1.0, + "ymin": 0.0 + }, "fog_fraction_at_screen_level": { "cmap": "viridis", "max": 1, diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py new file mode 100755 index 000000000..a3cfe0c72 --- /dev/null +++ b/src/CSET/operators/feature.py @@ -0,0 +1,186 @@ +# © Crown copyright, Met Office (2022-2025) and CSET contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Operators for identifying and tracking features.""" + +import logging +import os + +import iris +import numpy as np +from simpletrack.track import Tracker + + +def track( + cube: iris.cube.Cube, + threshold: float, + under_threshold: bool = False, + min_size: int = 4, + retain_lifetime_on_split: bool = True, + tracking_nbhood: int = 5, + overlap_threshold: float = 0.3, + save_data: bool = False, +): + """Track features between subsequent timesteps. + + Parameters + ---------- + threshold: float + The threshold value for feature detection. + under_threshold: bool, optional + If set to True, features are identified where the data is below the threshold. + If set to False, features are identified where the data is above the threshold. + Default is False. + min_size: int, optional + The minimum number of contiguous grid points required for a feature to be tracked. + Default is 4. + retain_lifetime_on_split: bool, optional + If set to True, the lifetime of a feature is retained when it splits into + multiple features. If set to False, the lifetime is reset when a feature splits. + Default is True. + tracking_nbhood: int, optional + The size of the neighbourhood used for tracking features between timesteps. + This dictates the maximum pixel radius from a feature centroid at which new features could + reasonably be spawned. + Default is 5. + overlap_threshold: float, optional + The minimum overlap required between features in consecutive timesteps for + them to be considered the same feature. + Default is 0.3. + save_data: bool, optional + If set to True, all tracking data is saved to disk for further analysis (including csv + and txt files containing feature properties that are not returned in output cubes). + Default is False. + + Returns + ------- + tracking_cubes: iris.cube.CubeList + A list of iris cubes containing tracking data, including feauture ID, lifetime, + and locations of initiating features. + + Notes + ----- + This operator uses the Simple-Track package to track features between timesteps. Simple-Track is a + data-agnostic, threshold-based object tracking algorithm for 2D data. Features are tracked between + consecutive frames of data by projecting feature fields onto common timeframes and matching + between them based on the degree of overlap. Matched features retain the same identification + between all tracked fields, while new features are assigned a unique label. + Thus, Simple-Track compiles comprehensive information about feature merging, splitting, accretion, + initiation and dissipation. + + Currently outputs three cubes containing the following data: + "feature_id": + A 2D field containing the unique label assigned to each feature, which is retained + if the feature is tracked across multiple timesteps. This cube can be used as a mask + to identify the location of the tracked feature throughout the evaluation period. + "feature_lifetime": + A 2D field containing the lifetime of each feature in terms of the number of + timesteps it has been tracked for. This cube can be used to distinguish between + mature and fresh features. + "feature_init": + A 2D binary field indicating the location of newly initiated features at each timestep. + These features are identified as having a lifetime of 1 AND have initiated sufficiently + far from other, existing features that they are not considered to have spawed from them. + + Links + ---------- + .. https://github.com/ParaChute-UK/simple-track + + Examples + -------- + >>> tracking_cubes = feature.track(threshold=2) + >>> lifetime_cube = tracking_cubes.extract_cube("feature_lifetime") + # Plot the final timestep of lifetime cube. This will show + # the lifetime of features that have been tracked for multiple previous + # timesteps, as well as new features that have just been initiated. + >>> iplt.pcolormesh(lifetime_cube[-1,:,:],cmap=mpl.cm.bwr) + >>> plt.gca().coastlines('10m') + >>> plt.clim(-5,5) + >>> plt.colorbar() + >>> plt.show() + + """ + # Setup config + tracker_config = { + "FEATURE": { + "threshold": threshold, + "under_threshold": under_threshold, + "min_size": min_size, + }, + "TRACKING": { + "retain_lifetime_on_split": retain_lifetime_on_split, + "overlap_nbhood": tracking_nbhood, + "overlap_threshold": overlap_threshold, + }, + "OUTPUT": { + "save_data": save_data, + "experiment_name": "feature_tracking", + "path": f"{os.getcwd()}/tracking_data", + }, + } + logging.debug(f"Tracker config: {tracker_config}") + + # Get cube data into a dict to pass to Tracker + times = cube.coord("time").points + time_units = cube.coord("time").units + times_dt = [time_units.num2pydate(t) for t in times] + cube_dict = { + time: cube_slice.data + for time, cube_slice in zip(times_dt, cube.slices_over("time"), strict=True) + } + + # Run tracking, returning Timeline object + timeline = Tracker(tracker_config).run(cube_dict) + logging.debug("Tracking completed") + + # Use input cube as template to make returned cube + # By iterating over all cube times, this will ensure all data is present + # If a Frame at the given time is not contained in the timeline, error is raised + output_type_and_methods = { + "lifetime": { + "getter": "lifetime_field", + "cube_name": "feature_lifetime", + }, + "feature": { + "getter": "feature_field", + "cube_name": "feature_id", + }, + "init": { + "getter": "get_init_field", + "cube_name": "feature_init", + }, + } + + tracking_cubelist = iris.cube.CubeList() + for output_type in output_type_and_methods: + tracking_data = [] + for time in times_dt: + frame = timeline.get_frame(time) + getter = getattr(frame, output_type_and_methods[output_type]["getter"]) + if callable(getter): + tracking_data.append(getter()) + else: + tracking_data.append(getter) + + # Convert to numpy arrays + tracking_data = np.stack(tracking_data, axis=0) + + # Create cubes + tracking_cube = cube.copy(data=tracking_data) + tracking_cube.long_name = output_type_and_methods[output_type]["cube_name"] + tracking_cube.standard_name = None + tracking_cube.var_name = None + tracking_cube.units = "1" + tracking_cubelist.append(tracking_cube) + + return tracking_cubelist diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py index c6a44e09b..48a62582e 100644 --- a/src/CSET/operators/plot.py +++ b/src/CSET/operators/plot.py @@ -647,6 +647,9 @@ def _plot_and_save_spatial_plot( # Specify the color bar cmap, levels, norm = _colorbar_map_levels(cube) + if "feature" in cube.long_name: + cmap.set_under("white") + # If overplotting, set required colorbars if overlay_cube: over_cmap, over_levels, over_norm = _colorbar_map_levels(overlay_cube) diff --git a/src/CSET/recipes/example_recipes/example_feature_track.yaml b/src/CSET/recipes/example_recipes/example_feature_track.yaml new file mode 100755 index 000000000..67d67a029 --- /dev/null +++ b/src/CSET/recipes/example_recipes/example_feature_track.yaml @@ -0,0 +1,26 @@ +category: Quick Look +title: Example running cell tracking and plotting spatial plots +description: | + Uses the feature.track operator to identify and track features in a cube with "time" series coordinate, + and then plots the lifetime of the identified features as a spatial plot. + +steps: + - operator: read.read_cubes + file_paths: $INPUT_PATHS + + - operator: filters.filter_cubes + constraint: + operator: constraints.generate_var_constraint + varname: precipitation_flux + + - operator: feature.track + threshold: 3 + save_data: False # Whether to save raw tracking data for further analysis + + # Filter tracking cubelist to just one of "feature_lifetime", "feature_id" or "feature_init" + - operator: filters.filter_cubes + constraint: + operator: constraints.generate_var_constraint + varname: feature_lifetime + + - operator: plot.spatial_pcolormesh_plot diff --git a/tests/operators/test_feature.py b/tests/operators/test_feature.py new file mode 100644 index 000000000..4d2565a7c --- /dev/null +++ b/tests/operators/test_feature.py @@ -0,0 +1,129 @@ +# © Crown copyright, Met Office (2022-2025) and CSET contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for feature operators.""" + +import datetime as dt +import os + +import cf_units +import iris +import iris.coords +import iris.cube +import numpy as np +import pytest + +from CSET.operators import feature + + +@pytest.fixture +def feature_cube() -> iris.cube.Cube: + """Set up three timesteps of data and place into cube.""" + data_arr = np.zeros((3, 10, 10)) + data_arr[0, 2:6, 2:6] = 1 + data_arr[1, 3:7, 3:7] = 1 + data_arr[2, 4:8, 4:8] = 1 + + time_units = cf_units.Unit("days since 2000-01-01 00:00:00", calendar="gregorian") + time_start = dt.datetime(2010, 1, 1, 0, 0, 0) + time_dt_points = [time_start + dt.timedelta(minutes=5 * idx) for idx in range(3)] + time_points = [time_units.date2num(time_point) for time_point in time_dt_points] + time_coord = iris.coords.DimCoord( + points=time_points, standard_name="time", units=time_units + ) + + coord_system = iris.coord_systems.TransverseMercator( + latitude_of_projection_origin=55, longitude_of_central_meridian=0 + ) + coord_range = np.arange(0, 100, 10) + proj_y_coord = iris.coords.DimCoord( + points=coord_range, + standard_name="projection_y_coordinate", + var_name="projection_y_coordinate", + units="m", + coord_system=coord_system, + ) + proj_x_coord = iris.coords.DimCoord( + points=coord_range, + standard_name="projection_x_coordinate", + var_name="projection_x_coordinate", + units="m", + coord_system=coord_system, + ) + + proj_y_coord.guess_bounds() + proj_x_coord.guess_bounds() + + coords = (time_coord, proj_y_coord, proj_x_coord) + dim_coords_and_dims = [(coord, dim) for dim, coord in enumerate(coords)] + cube = iris.cube.Cube( + data=data_arr, + dim_coords_and_dims=dim_coords_and_dims, + long_name="Precipitation test", + ) + return cube + + +def test_tracking_valid(feature_cube) -> None: + """ + Test feature tracking returns same cube shape as input cube. + + Further tracking tests handled by Simple-Track dependency + """ + test_threshold = 0.5 + min_size = 1 + tracking_cubelist = feature.track( + feature_cube, threshold=test_threshold, min_size=min_size + ) + outputs = ["feature_lifetime", "feature_id", "feature_init"] + for output in outputs: + tracking_cube = tracking_cubelist.extract_cube(output) + assert tracking_cube.shape == feature_cube.shape + + +def test_tracking_lifetime_values(feature_cube) -> None: + """Test feature tracking returns expected lifetime values.""" + test_threshold = 0.5 + min_size = 1 + tracking_cubelist = feature.track( + feature_cube, threshold=test_threshold, min_size=min_size + ) + tracking_cube = tracking_cubelist.extract_cube("feature_lifetime") + # Check lifetime field values are expected, based on feature_cube data + for time_slice_idx in range(3): + expected_lifetime_field = np.where( + feature_cube.data[time_slice_idx] > test_threshold, time_slice_idx + 1, 0 + ) + actual_lifetime_field = tracking_cube.data[time_slice_idx] + np.testing.assert_array_equal(actual_lifetime_field, expected_lifetime_field) + + +def test_save_data(feature_cube, tmp_path) -> None: + """Test that tracking data is saved when save_data is True.""" + os.chdir(tmp_path) + test_threshold = 0.5 + min_size = 1 + feature.track( + feature_cube, + threshold=test_threshold, + min_size=min_size, + save_data=True, + ) + # Check expected lifetime field is created in output directory + output_directory = f"{tmp_path}/tracking_data" + expected_file = f"{output_directory}/lifetime_20100101_0000.field" + assert os.path.isfile(expected_file) + + # Check expected csv file is created in output directory + expected_file = f"{output_directory}/frame_20100101_0000.csv" + assert os.path.isfile(expected_file)