diff --git a/doc/notebooks/derived-variables.ipynb b/doc/notebooks/derived-variables.ipynb new file mode 120000 index 0000000000..83fa3d5b7d --- /dev/null +++ b/doc/notebooks/derived-variables.ipynb @@ -0,0 +1 @@ +../../notebooks/derived-variables.ipynb \ No newline at end of file diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index c2ef55336d..5079fc7afb 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -13,8 +13,11 @@ from fnmatch import fnmatchcase from itertools import groupby from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Self +from iris.cube import CubeList + +import esmvalcore.preprocessor._derive from esmvalcore import esgf from esmvalcore._recipe import check from esmvalcore._recipe.from_datasets import datasets_to_recipe @@ -1008,3 +1011,112 @@ def _update_timerange(self) -> None: # Update the timerange check.valid_time_selection(timerange) self.set_facet("timerange", timerange) + + +class DerivedDataset: + """A dataset representing a variable that can be derived from other variables. + + Parameters + ---------- + **facets + Facets describing the dataset. See :ref:`facets` for the mapping between + the facet names used by ESMValCore and those used on ESGF and + :ref:`cmor_tables` to find out which variables are available. + + Attributes + ---------- + facets: + The facets describing the variable to be derived. This should at least + include the ``'short_name'``, ``'mip'``, and ``'project'`` facets, but + may include other facets as well. + + """ + + def __init__(self, **facets: FacetValue) -> None: + self.facets: Facets = facets + self._session: Session | None = None + + def __repr__(self) -> str: + """Create a string representation.""" + return ( + f"{self.__class__.__name__}(" + + ", ".join(f"{k}={v}" for k, v in self.facets.items()) + + ")" + ) + + @property + def session(self) -> Session: + """A :obj:`esmvalcore.config.Session` associated with the dataset.""" + if self._session is None: + session_name = f"session-{uuid.uuid4()}" + self._session = CFG.start_session(session_name) + return self._session + + @session.setter + def session(self, session: Session | None) -> None: + self._session = session + + @property + def required(self) -> list[Facets]: + """The facets of the datasets required to derive the variable.""" + return esmvalcore.preprocessor._derive.get_required( # noqa: SLF001 + short_name=self.facets["short_name"], # type: ignore + project=self.facets["project"], # type: ignore + ) + + def derive(self, cubes: Iterable[Cube]) -> Cube: + """Derive the variable from the input cubes. + + Parameters + ---------- + cubes + The cubes to derive the variable from. These should correspond to + the datasets described by :obj:`DerivedDataset.required`. + """ + var_info = get_tables( + session=CFG, + project=self.facets["project"], # type: ignore + ).get_variable( + table_name=self.facets["mip"], # type: ignore + short_name=self.facets["short_name"], # type: ignore + derived=True, + ) + return esmvalcore.preprocessor.derive( + cubes=CubeList(cubes), + short_name=var_info.short_name, # type: ignore[union-attr] + long_name=var_info.long_name, # type: ignore[union-attr] + units=var_info.units, # type: ignore[union-attr] + standard_name=var_info.standard_name, # type: ignore[union-attr] + ) + + def from_files(self) -> Iterator[Self]: + """Create derived datasets based on the available files. + + Yields + ------ + : + Derived datasets representing the available files. + """ + template = Dataset(**self.facets) + template.session = self.session + for dataset in template.copy(**self.required[0]).from_files(): + for facets in self.required[1:]: + if not dataset.copy(**facets).files: + break + else: + facets = self.facets | { + k: v + for k, v in dataset.facets.items() + if k in self.facets + and _isglob(self.facets[k]) + and not _isglob(v) + } + yield self.__class__(**facets) + + def load(self) -> Cube: + """Load the derived variable as a cube.""" + template = Dataset(**self.facets) + template.session = self.session + datasets = (template.copy(**facets) for facets in self.required) + cubes = (d.load() for d in datasets) + return self.derive(cubes) diff --git a/notebooks/derived-variables.ipynb b/notebooks/derived-variables.ipynb new file mode 100644 index 0000000000..5ae17cadf7 --- /dev/null +++ b/notebooks/derived-variables.ipynb @@ -0,0 +1,1199 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "bd168fbd-f5e8-4b32-906f-5c658b9758a0", + "metadata": {}, + "source": [ + "# Derived variables\n", + "\n", + "This notebook shows how to use derived variables. A derived variable is a variable that is not available as an input dataset, but computed from one or more input variables." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f0ccfe7f-c535-4606-99ce-be24960aece1", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import yaml\n", + "\n", + "import esmvalcore.preprocessor\n", + "from esmvalcore.cmor.table import get_tables\n", + "from esmvalcore.config import CFG\n", + "from esmvalcore.dataset import Dataset, DerivedDataset, datasets_to_recipe\n", + "\n", + "pd.set_option(\"display.max_colwidth\", None)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f4374495-19c4-4c3b-9fac-d929a5e595ad", + "metadata": {}, + "source": [ + "First, we configure ESMValCore so it searches the ESGF for data:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5d2711ea-6738-4a82-97b1-bc7d1212098a", + "metadata": {}, + "outputs": [], + "source": [ + "CFG[\"projects\"][\"CMIP6\"].pop(\n", + " \"data\",\n", + " None,\n", + ") # Clear existing CMIP6 configuration for finding input data\n", + "CFG.nested_update(\n", + " {\n", + " \"projects\": {\n", + " \"CMIP6\": {\n", + " \"data\": {\n", + " \"intake-esgf\": {\n", + " \"type\": \"esmvalcore.io.intake_esgf.IntakeESGFDataSource\",\n", + " \"priority\": 2,\n", + " \"facets\": {\n", + " \"activity\": \"activity_drs\",\n", + " \"dataset\": \"source_id\",\n", + " \"ensemble\": \"member_id\",\n", + " \"exp\": \"experiment_id\",\n", + " \"institute\": \"institution_id\",\n", + " \"grid\": \"grid_label\",\n", + " \"mip\": \"table_id\",\n", + " \"project\": \"project\",\n", + " \"short_name\": \"variable_id\",\n", + " },\n", + " },\n", + " },\n", + " },\n", + " },\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d5f03519-3580-4baa-93f0-71cb406bf29a", + "metadata": {}, + "source": [ + "## Which variables can be derived?\n", + "\n", + "The interface for working with derived variables from Python is not very polished yet. To list all available derived variables, we can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "57610048-42ca-4451-96bd-e787cf0eab33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
short_nameunitslong_name
29alb1albedo at the surface
38amockg s-1Atlantic Meridional Overturning Circulation
44asrW m-2Absorbed shortwave radiation
32chlorakg m-3chlorophyll concentration
46clhmtisccp%ISCCP High Level Medium-Thickness Cloud Area Fraction
2clhtkisccp%ISCCP high level thick cloud area fraction
7cllmtisccp%ISCCP Low Level Medium-Thickness Cloud Area Fraction
11clltkisccp%ISCCP low level thick cloud area fraction
0clmmtisccp%ISCCP Middle Level Medium-Thickness Cloud Area Fraction
36clmtkisccp%ISCCP Middle Level Thick Cloud Area Fraction
40co2s1e-06Atmosphere CO2
42ctotalkg m-2Total Carbon Mass in Ecosystem
47etmm day-1Evapotranspiration
5hfnsW m-2Surface Net Heat Flux
9hurs%Near-Surface Relative Humidity
26lapserateK km-1Lapse Rate
20lvpW m-2Latent Heat Release from Precipitation
8lwcreW m-2TOA Longwave Cloud Radiative Effect
41lwpkg m-2Liquid Water Path
31netcreW m-2TOA Net Cloud Radiative Effect
23ohcJHeat content in grid cell
43qepkg m-2 s-1Net moisture flux into atmosphere
39rlnsW m-2Surface Net downward Longwave Radiation
13rlnstW m-2Net Atmospheric Longwave Cooling
33rlnstcsW m-2Net Atmospheric Longwave Cooling assuming clear sky
12rlntcsW m-2TOA Net downward Longwave Radiation assuming clear sky
45rlusW m-2Surface Upwelling Longwave Radiation
28rsnsW m-2Surface Net downward Shortwave Radiation
25rsnstW m-2Heating from Shortwave Absorption
34rsnstcsW m-2Heating from Shortwave Absorption assuming clear sky
22rsnstcsnorm%Heating from Shortwave Absorption assuming clear sky normalized by incoming solar radiation
27rsntW m-2TOA Net downward Shortwave Radiation
3rsntcsW m-2TOA Net downward Shortwave Radiation assuming clear sky
10rsusW m-2Surface Upwelling Shortwave Radiation
17rtntW m-2TOA Net downward Total Radiation
1sfcwindNaNNaN
30siextent1Sea Ice Extent
14sispeedm s-1Sea-Ice Speed
37sithickmSea Ice Thickness
15smm3 m-3Volumetric Moisture in Upper Portion of Soil Column
16sozmStratospheric Ozone Column (O3 mole fraction >= 125 ppb)
4swcreW m-2TOA Shortwave Cloud Radiative Effect
21tozmTotal Column Ozone
6trozmTropospheric Ozone Column (O3 mole fraction < 125 ppb)
35uajetdegreesJet position expressed as latitude of maximum meridional wind speed
19vegfrac%Vegetation Fraction
24xch41Column-average Dry-air Mole Fraction of Atmospheric Methane
18xco21Column-average Dry-air Mole Fraction of Atmospheric Carbon Dioxide
\n", + "
" + ], + "text/plain": [ + " short_name units \\\n", + "29 alb 1 \n", + "38 amoc kg s-1 \n", + "44 asr W m-2 \n", + "32 chlora kg m-3 \n", + "46 clhmtisccp % \n", + "2 clhtkisccp % \n", + "7 cllmtisccp % \n", + "11 clltkisccp % \n", + "0 clmmtisccp % \n", + "36 clmtkisccp % \n", + "40 co2s 1e-06 \n", + "42 ctotal kg m-2 \n", + "47 et mm day-1 \n", + "5 hfns W m-2 \n", + "9 hurs % \n", + "26 lapserate K km-1 \n", + "20 lvp W m-2 \n", + "8 lwcre W m-2 \n", + "41 lwp kg m-2 \n", + "31 netcre W m-2 \n", + "23 ohc J \n", + "43 qep kg m-2 s-1 \n", + "39 rlns W m-2 \n", + "13 rlnst W m-2 \n", + "33 rlnstcs W m-2 \n", + "12 rlntcs W m-2 \n", + "45 rlus W m-2 \n", + "28 rsns W m-2 \n", + "25 rsnst W m-2 \n", + "34 rsnstcs W m-2 \n", + "22 rsnstcsnorm % \n", + "27 rsnt W m-2 \n", + "3 rsntcs W m-2 \n", + "10 rsus W m-2 \n", + "17 rtnt W m-2 \n", + "1 sfcwind NaN \n", + "30 siextent 1 \n", + "14 sispeed m s-1 \n", + "37 sithick m \n", + "15 sm m3 m-3 \n", + "16 soz m \n", + "4 swcre W m-2 \n", + "21 toz m \n", + "6 troz m \n", + "35 uajet degrees \n", + "19 vegfrac % \n", + "24 xch4 1 \n", + "18 xco2 1 \n", + "\n", + " long_name \n", + "29 albedo at the surface \n", + "38 Atlantic Meridional Overturning Circulation \n", + "44 Absorbed shortwave radiation \n", + "32 chlorophyll concentration \n", + "46 ISCCP High Level Medium-Thickness Cloud Area Fraction \n", + "2 ISCCP high level thick cloud area fraction \n", + "7 ISCCP Low Level Medium-Thickness Cloud Area Fraction \n", + "11 ISCCP low level thick cloud area fraction \n", + "0 ISCCP Middle Level Medium-Thickness Cloud Area Fraction \n", + "36 ISCCP Middle Level Thick Cloud Area Fraction \n", + "40 Atmosphere CO2 \n", + "42 Total Carbon Mass in Ecosystem \n", + "47 Evapotranspiration \n", + "5 Surface Net Heat Flux \n", + "9 Near-Surface Relative Humidity \n", + "26 Lapse Rate \n", + "20 Latent Heat Release from Precipitation \n", + "8 TOA Longwave Cloud Radiative Effect \n", + "41 Liquid Water Path \n", + "31 TOA Net Cloud Radiative Effect \n", + "23 Heat content in grid cell \n", + "43 Net moisture flux into atmosphere \n", + "39 Surface Net downward Longwave Radiation \n", + "13 Net Atmospheric Longwave Cooling \n", + "33 Net Atmospheric Longwave Cooling assuming clear sky \n", + "12 TOA Net downward Longwave Radiation assuming clear sky \n", + "45 Surface Upwelling Longwave Radiation \n", + "28 Surface Net downward Shortwave Radiation \n", + "25 Heating from Shortwave Absorption \n", + "34 Heating from Shortwave Absorption assuming clear sky \n", + "22 Heating from Shortwave Absorption assuming clear sky normalized by incoming solar radiation \n", + "27 TOA Net downward Shortwave Radiation \n", + "3 TOA Net downward Shortwave Radiation assuming clear sky \n", + "10 Surface Upwelling Shortwave Radiation \n", + "17 TOA Net downward Total Radiation \n", + "1 NaN \n", + "30 Sea Ice Extent \n", + "14 Sea-Ice Speed \n", + "37 Sea Ice Thickness \n", + "15 Volumetric Moisture in Upper Portion of Soil Column \n", + "16 Stratospheric Ozone Column (O3 mole fraction >= 125 ppb) \n", + "4 TOA Shortwave Cloud Radiative Effect \n", + "21 Total Column Ozone \n", + "6 Tropospheric Ozone Column (O3 mole fraction < 125 ppb) \n", + "35 Jet position expressed as latitude of maximum meridional wind speed \n", + "19 Vegetation Fraction \n", + "24 Column-average Dry-air Mole Fraction of Atmospheric Methane \n", + "18 Column-average Dry-air Mole Fraction of Atmospheric Carbon Dioxide " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_dict(\n", + " [\n", + " {\n", + " \"short_name\": short_name,\n", + " }\n", + " | {\n", + " k: getattr(\n", + " get_tables(CFG, project=\"CMIP6\").get_variable(\n", + " table_name=\"x\",\n", + " short_name=short_name,\n", + " derived=True,\n", + " ),\n", + " k,\n", + " None,\n", + " )\n", + " for k in [\"units\", \"long_name\"]\n", + " }\n", + " for short_name in esmvalcore.preprocessor._derive.ALL_DERIVED_VARIABLES # noqa: SLF001\n", + " ],\n", + ").sort_values(\"short_name\")" + ] + }, + { + "cell_type": "markdown", + "id": "f0294f89-fa3d-43da-a370-d2e4613fbdda", + "metadata": {}, + "source": [ + "Note that [modules, functions, and variables starting with a single `_` character should be considered internal](https://peps.python.org/pep-0008/#descriptive-naming-styles), so there are no guarantees about the stability of this interface." + ] + }, + { + "cell_type": "markdown", + "id": "d1f58094-65d1-4d55-bf02-3a14d4cdea1c", + "metadata": {}, + "source": [ + "## Finding available datasets" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "aea7a272-7d26-44d9-8766-379379e5d152", + "metadata": {}, + "source": [ + "We define a dataset template to search for all CMIP6 models that provide all required input datasets to derive `lwcre` or longwave cloud radiative effect at the top of atmosphere on a monthly resolution for the historical experiment. Note that ESMValCore uses its own names for the facets for a more uniform naming across different CMIP phases and other projects. The mapping to the facet names used on ESGF can be found in [Facets](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/reference/facets.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "23c26e29-ea87-40d7-a962-85a06fc77221", + "metadata": {}, + "outputs": [], + "source": [ + "dataset_template = DerivedDataset(\n", + " short_name=\"lwcre\",\n", + " mip=\"Amon\",\n", + " project=\"CMIP6\",\n", + " exp=\"historical\",\n", + " dataset=\"*\",\n", + " institute=\"*\",\n", + " ensemble=\"r1i1p1f1\",\n", + " grid=\"gn\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "baf29fbb-eed5-47bd-8805-c27ad34b0539", + "metadata": {}, + "source": [ + "Next, we use the `DerivedDataset.from_files` method to build a list of datasets from the available files. This may take a while as searching the ESGF for many files may be a bit slow. Because the search results are cached, subsequent searches will be faster." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d657320b-25c7-48f3-bfe1-5f3b94d7b789", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 37 datasets, showing the first 10:\n" + ] + }, + { + "data": { + "text/plain": [ + "[DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=TaiESM1, institute=AS-RCEC, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=AWI-CM-1-1-MR, institute=AWI, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=AWI-ESM-1-1-LR, institute=AWI, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=BCC-CSM2-MR, institute=BCC, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=BCC-ESM1, institute=BCC, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=CAMS-CSM1-0, institute=CAMS, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=CAS-ESM2-0, institute=CAS, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=FGOALS-g3, institute=CAS, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=IITM-ESM, institute=CCCR-IITM, ensemble=r1i1p1f1, grid=gn),\n", + " DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=CanESM5-1, institute=CCCma, ensemble=r1i1p1f1, grid=gn)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasets = list(dataset_template.from_files())\n", + "print(f\"Found {len(datasets)} datasets, showing the first 10:\")\n", + "datasets[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "d79ebf03-08bf-42ae-a756-1417566e3be8", + "metadata": {}, + "source": [ + "## Composing a recipe with derived variables" + ] + }, + { + "cell_type": "markdown", + "id": "3f88a30e-9dcd-431d-b469-3efd367795de", + "metadata": {}, + "source": [ + "To use the datasets found above in a recipe, we will want to use the name of the variable that needs to be derived, along with the `derive: true` option:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f7f13430-b359-4ef6-a06b-82af64857cfa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "datasets:\n", + "- dataset: ACCESS-CM2\n", + " institute: CSIRO-ARCCSS\n", + "- dataset: ACCESS-ESM1-5\n", + " institute: CSIRO\n", + "- dataset: AWI-CM-1-1-MR\n", + " institute: AWI\n", + "- dataset: AWI-ESM-1-1-LR\n", + " institute: AWI\n", + "- dataset: BCC-CSM2-MR\n", + " institute: BCC\n", + "- dataset: BCC-ESM1\n", + " institute: BCC\n", + "- dataset: CAMS-CSM1-0\n", + " institute: CAMS\n", + "- dataset: CAS-ESM2-0\n", + " institute: CAS\n", + "- dataset: CESM2\n", + " institute: NCAR\n", + "- dataset: CESM2-FV2\n", + " institute: NCAR\n", + "- dataset: CESM2-WACCM\n", + " institute: NCAR\n", + "- dataset: CESM2-WACCM-FV2\n", + " institute: NCAR\n", + "- dataset: CMCC-CM2-HR4\n", + " institute: CMCC\n", + "- dataset: CMCC-CM2-SR5\n", + " institute: CMCC\n", + "- dataset: CMCC-ESM2\n", + " institute: CMCC\n", + "- dataset: CanESM5\n", + " institute: CCCma\n", + "- dataset: CanESM5-1\n", + " institute: CCCma\n", + "- dataset: FGOALS-g3\n", + " institute: CAS\n", + "- dataset: FIO-ESM-2-0\n", + " institute: FIO-QLNM\n", + "- dataset: GISS-E2-1-G\n", + " institute: NASA-GISS\n", + "- dataset: GISS-E2-1-G-CC\n", + " institute: NASA-GISS\n", + "- dataset: GISS-E2-1-H\n", + " institute: NASA-GISS\n", + "- dataset: GISS-E2-2-G\n", + " institute: NASA-GISS\n", + "- dataset: GISS-E2-2-H\n", + " institute: NASA-GISS\n", + "- dataset: ICON-ESM-LR\n", + " institute: MPI-M\n", + "- dataset: IITM-ESM\n", + " institute: CCCR-IITM\n", + "- dataset: MIROC6\n", + " institute: MIROC\n", + "- dataset: MPI-ESM-1-2-HAM\n", + " institute: HAMMOZ-Consortium\n", + "- dataset: MPI-ESM1-2-HR\n", + " institute: MPI-M\n", + "- dataset: MPI-ESM1-2-LR\n", + " institute: MPI-M\n", + "- dataset: MRI-ESM2-0\n", + " institute: MRI\n", + "- dataset: NESM3\n", + " institute: NUIST\n", + "- dataset: NorCPM1\n", + " institute: NCC\n", + "- dataset: NorESM2-LM\n", + " institute: NCC\n", + "- dataset: NorESM2-MM\n", + " institute: NCC\n", + "- dataset: SAM0-UNICON\n", + " institute: SNU\n", + "- dataset: TaiESM1\n", + " institute: AS-RCEC\n", + "diagnostics:\n", + " diagnostic_name:\n", + " variables:\n", + " lwcre:\n", + " derive: true\n", + " ensemble: r1i1p1f1\n", + " exp: historical\n", + " grid: gn\n", + " mip: Amon\n", + " project: CMIP6\n", + "\n" + ] + } + ], + "source": [ + "recipe_datasets = [\n", + " Dataset(\n", + " diagnostic=\"diagnostic_name\",\n", + " derive=True,\n", + " **dataset.facets,\n", + " )\n", + " for dataset in datasets\n", + "]\n", + "print(yaml.safe_dump(datasets_to_recipe(recipe_datasets)))" + ] + }, + { + "cell_type": "markdown", + "id": "265a0d2e-2541-4171-8d0b-406a42a519e1", + "metadata": {}, + "source": [ + "There is also a `force_derivation` option available for use in the recipe, when set to `true` that will cause the variable to be derived even if it is already available as a dataset." + ] + }, + { + "cell_type": "markdown", + "id": "4d7ea302-57ac-4054-9130-13860827bfc2", + "metadata": {}, + "source": [ + "## Computing the derived variable" + ] + }, + { + "cell_type": "markdown", + "id": "79d9d439-f95e-4ae8-8585-0d2b506d338c", + "metadata": {}, + "source": [ + "Let's load the data to derive the first dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0509e9bb-782b-4cae-a37e-757de0f482c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DerivedDataset(short_name=lwcre, mip=Amon, project=CMIP6, exp=historical, dataset=TaiESM1, institute=AS-RCEC, ensemble=r1i1p1f1, grid=gn)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = datasets[0]\n", + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "22a1bd2d-f329-4610-8076-3c109dade67e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:esmvalcore.cmor.check:There were warnings in variable rlut:\n", + " rlut: attribute positive not present\n", + "loaded from file \n", + "WARNING:esmvalcore.cmor.check:There were warnings in variable rlutcs:\n", + " rlutcs: attribute positive not present\n", + "loaded from file \n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "
Toa Longwave Cloud Radiative Effect (W m-2)timelatitudelongitude
Shape1980192288
Dimension coordinates
\ttimex--
\tlatitude-x-
\tlongitude--x
Attributes
\tConventions'CF-1.7 CMIP-6.2'
\tactivity_drs'CMIP'
\tactivity_id'CMIP'
\tbranch_method'Hybrid-restart from year 0671-01-01 of piControl'
\tbranch_time0.0
\tbranch_time_in_child-674885
\tbranch_time_in_parent171550.0
\tcmor_version'3.5.0'
\tcontact'Dr. Wei-Liang Lee (leelupin@gate.sinica.edu.tw)'
\tdata_specs_version'01.00.31'
\texperiment'all-forcing simulation of the recent past'
\texperiment_id'historical'
\texternal_variables'areacella'
\tforcing_index1
\tfrequency'mon'
\tfurther_info_url'https://furtherinfo.es-doc.org/CMIP6.AS-RCEC.TaiESM1.historical.none.r ...'
\tgrid'finite-volume grid with 0.9x1.25 degree lat/lon resolution'
\tgrid_label'gn'
\tinitialization_index1
\tinstitution'Research Center for Environmental Changes, Academia Sinica, Nankang, Taipei ...'
\tinstitution_id'AS-RCEC'
\tlicense'CMIP6 model data produced by NCC is licensed under a Creative Commons Attribution ...'
\tmember_id'r1i1p1f1'
\tmip_era'CMIP6'
\tmodel_id'TaiESM1'
\tnominal_resolution'100 km'
\toriginal_units'W/m2'
\tparent_activity_id'CMIP'
\tparent_experiment_id'piControl'
\tparent_mip_era'CMIP6'
\tparent_source_id'TaiESM1'
\tparent_sub_experiment_id'none'
\tparent_time_units'days since 1850-1-1 00:00:00'
\tparent_variant_label'r1i1p1f1'
\tphysics_index1
\tpositive'down'
\tproduct'model-output'
\trealization_index1
\trealm'atmos'
\treferences'10.5194/gmd-2019-377'
\trun_variant'N/A'
\tsource'TaiESM 1.0 (2018): \\naerosol: SNAP (same grid as atmos)\\natmos: TaiAM1 ...'
\tsource_id'TaiESM1'
\tsource_type'AOGCM AER BGC'
\tsub_experiment'none'
\tsub_experiment_id'none'
\ttable_id'Amon'
\ttable_info'Creation Date:(24 July 2019) MD5:0bb394a356ef9d214d027f1aca45853e'
\ttitle'TaiESM1 output prepared for CMIP6'
\tvariant_label'r1i1p1f1'
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cubes = dataset.load()\n", + "cubes" + ] + }, + { + "cell_type": "markdown", + "id": "90f46e4b-e4ab-4cf9-bde1-cf6dc074d5ae", + "metadata": {}, + "source": [ + "## Implementing your own derived variables\n", + "\n", + "Guidance on adding new built-in derived variables to ESMValCore is available in [Deriving a variable](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/develop/derivation.html). However, if you are only using the Python interface, you can define an ad-hoc derived variable by subclassing the `DerivedDataset` class and implementing a custom `required` attribute and `derive` method. The `required` attribute defines the facets that describe the input data:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "16f4d1f7-bec9-4657-bfd8-ed40d8a3a475", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'short_name': 'rlut'}, {'short_name': 'rlutcs'}]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.required" + ] + }, + { + "cell_type": "markdown", + "id": "d3efe990-eccc-4170-acdc-83e8ac719128", + "metadata": {}, + "source": [ + "in this case we see that `lwcre` is derived from variables `rlut` and `rlutcs`. The `derive` method is a function that takes the iris cubes resulting from loading the datasets described by the `facets` and `required` attribute as an argument, and computes the derived variable." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}