From c27c26ec5821776108d59511aff5ccc373c3c315 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Mon, 3 Feb 2025 18:33:17 +0100 Subject: [PATCH 1/3] started --- CHANGES.md | 3 +++ docs/todo.md | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 840b0b1..ff7fcf7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,9 @@ ### Adjustments and Enhancements +- Added a new core rule `var-fill-value` that checks for the recommended + use of a variable's fill value. + - Added a new core rule `access-latency` that can be used to check the time it takes to open a dataset. diff --git a/docs/todo.md b/docs/todo.md index 6990301..c0f1387 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -16,7 +16,6 @@ ## Desired - project logo -- add `core` rule checks recommended use of fill value - add `xcube` rule that helps to identify chunking issues - apply rule op args/kwargs validation schema - allow outputting suggestions, if any, that are emitted by some rules From cacd12c726fae29143cd9cef1fd91262a28a2670 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 4 Feb 2025 09:17:32 +0100 Subject: [PATCH 2/3] new rule `var-missing-data` --- CHANGES.md | 4 +- .../core/rules/test_var_missing_data.py | 49 +++++++++++++++++ tests/plugins/core/test_plugin.py | 1 + xrlint/plugins/core/__init__.py | 1 + xrlint/plugins/core/rules/var_missing_data.py | 53 +++++++++++++++++++ 5 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 tests/plugins/core/rules/test_var_missing_data.py create mode 100644 xrlint/plugins/core/rules/var_missing_data.py diff --git a/CHANGES.md b/CHANGES.md index ff7fcf7..140ee62 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,8 +4,8 @@ ### Adjustments and Enhancements -- Added a new core rule `var-fill-value` that checks for the recommended - use of a variable's fill value. +- Added a new core rule `var-missing-data` that checks for the recommended + use of a variable's missing data. - Added a new core rule `access-latency` that can be used to check the time it takes to open a dataset. diff --git a/tests/plugins/core/rules/test_var_missing_data.py b/tests/plugins/core/rules/test_var_missing_data.py new file mode 100644 index 0000000..fe4c0dd --- /dev/null +++ b/tests/plugins/core/rules/test_var_missing_data.py @@ -0,0 +1,49 @@ +# Copyright © 2025 Brockmann Consult GmbH. +# This software is distributed under the terms and conditions of the +# MIT license (https://mit-license.org/). + +import xarray as xr + +from xrlint.plugins.core.rules.var_missing_data import VarMissingData +from xrlint.testing import RuleTest, RuleTester + +# TODO: adjust datasets to rule + +valid_dataset_0 = xr.Dataset() +valid_dataset_1 = xr.Dataset( + attrs=dict(title="v-data"), + coords={"t": xr.DataArray([0, 1, 2], dims="t", attrs={"units": "seconds"})}, + data_vars={"v": xr.DataArray([10, 20, 30], dims="t", attrs={"units": "m/s"})}, +) +valid_dataset_2 = valid_dataset_1.copy() +valid_dataset_2.t.encoding["units"] = "seconds since 2025-02-01 12:15:00" +del valid_dataset_2.t.attrs["units"] + +valid_dataset_3 = valid_dataset_1.copy() +valid_dataset_3.t.attrs["grid_mapping_name"] = "latitude_longitude" + +invalid_dataset_0 = valid_dataset_1.copy() +invalid_dataset_0.t.attrs = {} + +invalid_dataset_1 = valid_dataset_1.copy() +invalid_dataset_1.t.attrs = {"units": 1} + +invalid_dataset_2 = valid_dataset_1.copy() +invalid_dataset_2.t.attrs = {"units": ""} + + +VarMissingDataTest = RuleTester.define_test( + "var-missing-data", + VarMissingData, + valid=[ + RuleTest(dataset=valid_dataset_0), + RuleTest(dataset=valid_dataset_1), + RuleTest(dataset=valid_dataset_2), + RuleTest(dataset=valid_dataset_3), + ], + invalid=[ + RuleTest(dataset=invalid_dataset_0, expected=["Missing attribute 'units'."]), + RuleTest(dataset=invalid_dataset_1, expected=["Invalid attribute 'units': 1"]), + RuleTest(dataset=invalid_dataset_2, expected=["Empty attribute 'units'."]), + ], +) diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py index a8a6097..f82546d 100644 --- a/tests/plugins/core/test_plugin.py +++ b/tests/plugins/core/test_plugin.py @@ -24,6 +24,7 @@ def test_rules_complete(self): "time-coordinate", "var-desc", "var-flags", + "var-missing-data", "var-units", }, set(plugin.rules.keys()), diff --git a/xrlint/plugins/core/__init__.py b/xrlint/plugins/core/__init__.py index cb0ce3e..f48004b 100644 --- a/xrlint/plugins/core/__init__.py +++ b/xrlint/plugins/core/__init__.py @@ -28,6 +28,7 @@ def export_plugin() -> Plugin: "time-coordinate": "error", "var-desc": "warn", "var-flags": "error", + "var-missing-data": "warn", "var-units": "warn", }, }, diff --git a/xrlint/plugins/core/rules/var_missing_data.py b/xrlint/plugins/core/rules/var_missing_data.py new file mode 100644 index 0000000..06c07f5 --- /dev/null +++ b/xrlint/plugins/core/rules/var_missing_data.py @@ -0,0 +1,53 @@ +# Copyright © 2025 Brockmann Consult GmbH. +# This software is distributed under the terms and conditions of the +# MIT license (https://mit-license.org/). + +import numpy as np + +from xrlint.node import VariableNode +from xrlint.plugins.core.plugin import plugin +from xrlint.rule import RuleContext, RuleOp + + +@plugin.define_rule( + "var-missing-data", + version="1.0.0", + type="suggestion", + description=( + "Checks the recommended use of missing data, i.e., coordinate variables" + " should not define missing data, but packed data should." + " Notifies about the use of valid ranges to indicate missing data, which" + " is currently not supported by xarray." + ), + docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units", +) +class VarMissingData(RuleOp): + def validate_variable(self, ctx: RuleContext, node: VariableNode): + array = node.array + encoding = array.encoding + attrs = array.attrs + + fill_value_source = None + if "_FillValue" in encoding: + fill_value_source = "encoding" + elif "_FillValue" in attrs: + fill_value_source = "attribute" + + if fill_value_source is not None and node.name in ctx.dataset.coords: + ctx.report( + f"Unexpected {fill_value_source} '_FillValue'," + f" coordinates must not have missing data." + ) + elif fill_value_source is None and node.name in ctx.dataset.data_vars: + scaling_factor = encoding.get("scaling_factor", attrs.get("scaling_factor")) + add_offset = encoding.get("add_offset", attrs.get("add_offset")) + raw_dtype = encoding.get("dtype") + if add_offset is not None or scaling_factor is not None: + ctx.report("Missing attribute '_FillValue' since data is packed.") + elif isinstance(raw_dtype, np.dtype) and np.issubdtype( + raw_dtype, np.floating + ): + ctx.report("Missing attribute '_FillValue', which should be NaN.") + + if any((name in attrs) for name in ("valid_min", "valid_max", "valid_range")): + ctx.report("Valid ranges are not recognized by xarray (as of Feb 2025).") From 25af807de572e608d418955c9ab8d6af517c25f2 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 12 Feb 2025 20:51:53 +0100 Subject: [PATCH 3/3] Added tests --- .../core/rules/test_var_missing_data.py | 54 ++++++++++++------- xrlint/plugins/core/rules/var_missing_data.py | 2 +- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/tests/plugins/core/rules/test_var_missing_data.py b/tests/plugins/core/rules/test_var_missing_data.py index fe4c0dd..0099efc 100644 --- a/tests/plugins/core/rules/test_var_missing_data.py +++ b/tests/plugins/core/rules/test_var_missing_data.py @@ -1,36 +1,33 @@ # Copyright © 2025 Brockmann Consult GmbH. # This software is distributed under the terms and conditions of the # MIT license (https://mit-license.org/). - +import numpy as np import xarray as xr from xrlint.plugins.core.rules.var_missing_data import VarMissingData from xrlint.testing import RuleTest, RuleTester -# TODO: adjust datasets to rule - valid_dataset_0 = xr.Dataset() valid_dataset_1 = xr.Dataset( attrs=dict(title="v-data"), coords={"t": xr.DataArray([0, 1, 2], dims="t", attrs={"units": "seconds"})}, data_vars={"v": xr.DataArray([10, 20, 30], dims="t", attrs={"units": "m/s"})}, ) -valid_dataset_2 = valid_dataset_1.copy() -valid_dataset_2.t.encoding["units"] = "seconds since 2025-02-01 12:15:00" -del valid_dataset_2.t.attrs["units"] -valid_dataset_3 = valid_dataset_1.copy() -valid_dataset_3.t.attrs["grid_mapping_name"] = "latitude_longitude" +invalid_dataset_0 = valid_dataset_1.copy(deep=True) +invalid_dataset_0.t.attrs["_FillValue"] = -999 -invalid_dataset_0 = valid_dataset_1.copy() -invalid_dataset_0.t.attrs = {} +invalid_dataset_1 = valid_dataset_1.copy(deep=True) +invalid_dataset_1.t.encoding["_FillValue"] = -999 -invalid_dataset_1 = valid_dataset_1.copy() -invalid_dataset_1.t.attrs = {"units": 1} +invalid_dataset_2 = valid_dataset_1.copy(deep=True) +invalid_dataset_2.v.attrs["scaling_factor"] = 0.01 -invalid_dataset_2 = valid_dataset_1.copy() -invalid_dataset_2.t.attrs = {"units": ""} +invalid_dataset_3 = valid_dataset_1.copy(deep=True) +invalid_dataset_3.v.encoding["dtype"] = np.dtype(np.float64) +invalid_dataset_4 = valid_dataset_1.copy(deep=True) +invalid_dataset_4.v.attrs["valid_range"] = [0, 1] VarMissingDataTest = RuleTester.define_test( "var-missing-data", @@ -38,12 +35,31 @@ valid=[ RuleTest(dataset=valid_dataset_0), RuleTest(dataset=valid_dataset_1), - RuleTest(dataset=valid_dataset_2), - RuleTest(dataset=valid_dataset_3), ], invalid=[ - RuleTest(dataset=invalid_dataset_0, expected=["Missing attribute 'units'."]), - RuleTest(dataset=invalid_dataset_1, expected=["Invalid attribute 'units': 1"]), - RuleTest(dataset=invalid_dataset_2, expected=["Empty attribute 'units'."]), + RuleTest( + dataset=invalid_dataset_0, + expected=[ + "Unexpected attribute '_FillValue', coordinates must not have missing data." + ], + ), + RuleTest( + dataset=invalid_dataset_1, + expected=[ + "Unexpected encoding '_FillValue', coordinates must not have missing data." + ], + ), + RuleTest( + dataset=invalid_dataset_2, + expected=["Missing attribute '_FillValue' since data packing is used."], + ), + RuleTest( + dataset=invalid_dataset_3, + expected=["Missing attribute '_FillValue', which should be NaN."], + ), + RuleTest( + dataset=invalid_dataset_4, + expected=["Valid ranges are not recognized by xarray (as of Feb 2025)."], + ), ], ) diff --git a/xrlint/plugins/core/rules/var_missing_data.py b/xrlint/plugins/core/rules/var_missing_data.py index 06c07f5..a8afbed 100644 --- a/xrlint/plugins/core/rules/var_missing_data.py +++ b/xrlint/plugins/core/rules/var_missing_data.py @@ -43,7 +43,7 @@ def validate_variable(self, ctx: RuleContext, node: VariableNode): add_offset = encoding.get("add_offset", attrs.get("add_offset")) raw_dtype = encoding.get("dtype") if add_offset is not None or scaling_factor is not None: - ctx.report("Missing attribute '_FillValue' since data is packed.") + ctx.report("Missing attribute '_FillValue' since data packing is used.") elif isinstance(raw_dtype, np.dtype) and np.issubdtype( raw_dtype, np.floating ):