diff --git a/CHANGES.md b/CHANGES.md index 840b0b1..140ee62 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,9 @@ ### Adjustments and Enhancements +- Added a new core rule `var-missing-data` that checks for the recommended + use of a variable's missing data. + - Added a new core rule `access-latency` that can be used to check the time it takes to open a dataset. diff --git a/docs/todo.md b/docs/todo.md index 4463bb4..2826c9e 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -13,7 +13,6 @@ ## Desired - project logo -- add `core` rule checks recommended use of fill value - add `xcube` rule that helps to identify chunking issues - apply rule op args/kwargs validation schema - allow outputting suggestions, if any, that are emitted by some rules diff --git a/tests/plugins/core/rules/test_var_missing_data.py b/tests/plugins/core/rules/test_var_missing_data.py new file mode 100644 index 0000000..0099efc --- /dev/null +++ b/tests/plugins/core/rules/test_var_missing_data.py @@ -0,0 +1,65 @@ +# Copyright © 2025 Brockmann Consult GmbH. +# This software is distributed under the terms and conditions of the +# MIT license (https://mit-license.org/). +import numpy as np +import xarray as xr + +from xrlint.plugins.core.rules.var_missing_data import VarMissingData +from xrlint.testing import RuleTest, RuleTester + +valid_dataset_0 = xr.Dataset() +valid_dataset_1 = xr.Dataset( + attrs=dict(title="v-data"), + coords={"t": xr.DataArray([0, 1, 2], dims="t", attrs={"units": "seconds"})}, + data_vars={"v": xr.DataArray([10, 20, 30], dims="t", attrs={"units": "m/s"})}, +) + +invalid_dataset_0 = valid_dataset_1.copy(deep=True) +invalid_dataset_0.t.attrs["_FillValue"] = -999 + +invalid_dataset_1 = valid_dataset_1.copy(deep=True) +invalid_dataset_1.t.encoding["_FillValue"] = -999 + +invalid_dataset_2 = valid_dataset_1.copy(deep=True) +invalid_dataset_2.v.attrs["scaling_factor"] = 0.01 + +invalid_dataset_3 = valid_dataset_1.copy(deep=True) +invalid_dataset_3.v.encoding["dtype"] = np.dtype(np.float64) + +invalid_dataset_4 = valid_dataset_1.copy(deep=True) +invalid_dataset_4.v.attrs["valid_range"] = [0, 1] + +VarMissingDataTest = RuleTester.define_test( + "var-missing-data", + VarMissingData, + valid=[ + RuleTest(dataset=valid_dataset_0), + RuleTest(dataset=valid_dataset_1), + ], + invalid=[ + RuleTest( + dataset=invalid_dataset_0, + expected=[ + "Unexpected attribute '_FillValue', coordinates must not have missing data." + ], + ), + RuleTest( + dataset=invalid_dataset_1, + expected=[ + "Unexpected encoding '_FillValue', coordinates must not have missing data." + ], + ), + RuleTest( + dataset=invalid_dataset_2, + expected=["Missing attribute '_FillValue' since data packing is used."], + ), + RuleTest( + dataset=invalid_dataset_3, + expected=["Missing attribute '_FillValue', which should be NaN."], + ), + RuleTest( + dataset=invalid_dataset_4, + expected=["Valid ranges are not recognized by xarray (as of Feb 2025)."], + ), + ], +) diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py index a8a6097..f82546d 100644 --- a/tests/plugins/core/test_plugin.py +++ b/tests/plugins/core/test_plugin.py @@ -24,6 +24,7 @@ def test_rules_complete(self): "time-coordinate", "var-desc", "var-flags", + "var-missing-data", "var-units", }, set(plugin.rules.keys()), diff --git a/xrlint/plugins/core/__init__.py b/xrlint/plugins/core/__init__.py index cb0ce3e..f48004b 100644 --- a/xrlint/plugins/core/__init__.py +++ b/xrlint/plugins/core/__init__.py @@ -28,6 +28,7 @@ def export_plugin() -> Plugin: "time-coordinate": "error", "var-desc": "warn", "var-flags": "error", + "var-missing-data": "warn", "var-units": "warn", }, }, diff --git a/xrlint/plugins/core/rules/var_missing_data.py b/xrlint/plugins/core/rules/var_missing_data.py new file mode 100644 index 0000000..a8afbed --- /dev/null +++ b/xrlint/plugins/core/rules/var_missing_data.py @@ -0,0 +1,53 @@ +# Copyright © 2025 Brockmann Consult GmbH. +# This software is distributed under the terms and conditions of the +# MIT license (https://mit-license.org/). + +import numpy as np + +from xrlint.node import VariableNode +from xrlint.plugins.core.plugin import plugin +from xrlint.rule import RuleContext, RuleOp + + +@plugin.define_rule( + "var-missing-data", + version="1.0.0", + type="suggestion", + description=( + "Checks the recommended use of missing data, i.e., coordinate variables" + " should not define missing data, but packed data should." + " Notifies about the use of valid ranges to indicate missing data, which" + " is currently not supported by xarray." + ), + docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units", +) +class VarMissingData(RuleOp): + def validate_variable(self, ctx: RuleContext, node: VariableNode): + array = node.array + encoding = array.encoding + attrs = array.attrs + + fill_value_source = None + if "_FillValue" in encoding: + fill_value_source = "encoding" + elif "_FillValue" in attrs: + fill_value_source = "attribute" + + if fill_value_source is not None and node.name in ctx.dataset.coords: + ctx.report( + f"Unexpected {fill_value_source} '_FillValue'," + f" coordinates must not have missing data." + ) + elif fill_value_source is None and node.name in ctx.dataset.data_vars: + scaling_factor = encoding.get("scaling_factor", attrs.get("scaling_factor")) + add_offset = encoding.get("add_offset", attrs.get("add_offset")) + raw_dtype = encoding.get("dtype") + if add_offset is not None or scaling_factor is not None: + ctx.report("Missing attribute '_FillValue' since data packing is used.") + elif isinstance(raw_dtype, np.dtype) and np.issubdtype( + raw_dtype, np.floating + ): + ctx.report("Missing attribute '_FillValue', which should be NaN.") + + if any((name in attrs) for name in ("valid_min", "valid_max", "valid_range")): + ctx.report("Valid ranges are not recognized by xarray (as of Feb 2025).")