diff --git a/climanet/dataset.py b/climanet/dataset.py index 7976297..95842da 100644 --- a/climanet/dataset.py +++ b/climanet/dataset.py @@ -20,6 +20,23 @@ def __init__( spatial_dims: Tuple[str, str] = ("lat", "lon"), patch_size: Tuple[int, int] = (16, 16), # (lat, lon) ): + """Initialize the dataset with daily and monthly data, land mask, and patching parameters. + + Parameters + ---------- + daily_da : xr.DataArray + Daily data array. + monthly_da : xr.DataArray + Monthly data array. + land_mask : xr.DataArray, optional + Land mask array, by default None + time_dim : str, optional + Name of the time dimension, by default "time" + spatial_dims : Tuple[str, str], optional + Names of the spatial dimensions, by default ("lat", "lon") + patch_size : Tuple[int, int], optional + Size of the patches, by default (16, 16) + """ self.spatial_dims = spatial_dims self.patch_size = patch_size self.daily_da = daily_da diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..44dde10 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,84 @@ +# Example of training a SpatioTemporalModel on HPC + +## Folder structure + +- example_training.py: example training script +- example.slurm: example SLURM script to execute the training script on SLURM system +- eso4clima_24438134_subset.out: example SLURM job output file of an execution on a subset of the global dataset. The dataset has two years of data (2020-2021) and the spatial coverage is from 30S to 30N and from 30W to 30E. +- eso4clima_24449471_full.out: example SLURM job output file of an execution on the full dataset, two years of data (2020-2021) and almost global coverage (from 80S to 80N and from 179.99W to 179.99E). The training only executed for 1 hour and cuted off by SLURM time limit. + +## Execute training tasks on SLURM system + +1. Make a working directory + +```sh +mkdir training +cd training +``` + +2. Clone this repo +```sh +git clone git@github.com:ESMValGroup/ClimaNet.git +``` + +3. Install uv for dependency management. Se [uv doc](https://docs.astral.sh/uv/getting-started/installation/). + +4. Create a venv and install Python dependencies using uv +```sh +cd ClimaNet +``` + +``` +uv sync +``` + +A `.venv` dir will appear + +5. Copy the python script and slurm script into the working dir: + +```sh +cp ClimaNet/scripts/example* . +``` + +6. Config `example.slurm`, in the `source ...` line, make sure the venv just created is activated. + Note that the account is the ESO4CLIMA project account, which is shared by multiple users. + +7. Config `example.py`, make sure the path of input data and land mask data is correct. + +8. Execute the SLURM job +```sh +sbatch example.slurm +``` + +## Check the efficiency of resource usage + +In the SLURM job output, you can find the line like this: + +``` +==== Slurm accounting summary 23743544 ==== +JobID|NTasks|AveCPU|AveRSS|MaxRSS|MaxVMSize|TRESUsageInAve|TRESUsageInMax +23743544.extern|1|00:00:00|856K|3752K|641376K|cpu=00:00:00,energy=0,fs/disk=2332,mem=856K,pages=2,vmem=217160K|cpu=00:00:00,energy=0,fs/disk=2332,mem=3752K,pages=2,vmem=641376K +23743544.batch|1|04:21:01|11964K|4102096K|37743716K|cpu=04:21:01,energy=0,fs/disk=22293117907,mem=11964K,pages=19,vmem=356724K|cpu=04:21:01,energy=0,fs/disk=22293117907,mem=4102096K,pages=7711,vmem=37743716K +``` + +Which gives some information about the resource usage at the end of the job. + +To have a better understanding of the efficiency of resource usage, you can run the following command after the job is finished: + +```sh +sacct -j \ + --format=JobID,JobName%30,Partition,AllocCPUS,Elapsed,TotalCPU,MaxRSS,State,ExitCode \ + --parsable2 >> "eso4clima_.out" + +``` + +This will output the resource usage information and add it to the slurm job output file. After running this you can find the line like this in the output file: + +``` +JobID|JobName|Partition|AllocCPUS|Elapsed|TotalCPU|MaxRSS|State|ExitCode +23743544|eso4clima|compute|256|00:02:44|04:21:01||COMPLETED|0:0 +23743544.batch|batch||256|00:02:44|04:21:01|4102096K|COMPLETED|0:0 +23743544.extern|extern||256|00:02:44|00:00.001|3752K|COMPLETED|0:0 +``` + +The the efficiency of resource usage can be calculated as `TotalCPU / AllocCPUS * Elapsed Time`. In the example above, the CPU time is `04:21:01`, the allocated CPU is `256`, and the elapsed time is `00:02:44`, so the resource usage is `4:21:01 / 256 * 00:02:44 = 0.37`. \ No newline at end of file diff --git a/scripts/eso4clima_24438134_subset.out b/scripts/eso4clima_24438134_subset.out new file mode 100644 index 0000000..377cb42 --- /dev/null +++ b/scripts/eso4clima_24438134_subset.out @@ -0,0 +1,263 @@ +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +2026-04-23 12:05:45,831 - INFO - Creating the model... +2026-04-23 12:05:46,026 - INFO - Creating the dataset... +2026-04-23 12:06:16,938 - INFO - Starting training... +Epoch 0: best_loss = 3.379575 +Epoch 20: best_loss = 2.331017 +Epoch 40: best_loss = 2.142740 +Epoch 60: best_loss = 1.710370 +Epoch 80: best_loss = 1.189252 +Epoch 100: best_loss = 0.894883 +Epoch 120: best_loss = 0.688254 +Epoch 140: best_loss = 0.571773 +Epoch 160: best_loss = 0.505098 +Epoch 180: best_loss = 0.445357 +Epoch 200: best_loss = 0.412255 +Epoch 220: best_loss = 0.381430 +Epoch 240: best_loss = 0.361015 +Epoch 260: best_loss = 0.346520 +Epoch 280: best_loss = 0.325091 +Epoch 300: best_loss = 0.317928 +Epoch 320: best_loss = 0.310767 +Epoch 340: best_loss = 0.303774 +Epoch 360: best_loss = 0.296891 +Epoch 380: best_loss = 0.290118 +Epoch 400: best_loss = 0.285634 +Epoch 420: best_loss = 0.281635 +Epoch 440: best_loss = 0.278483 +Epoch 460: best_loss = 0.275278 +Epoch 480: best_loss = 0.272091 +Epoch 500: best_loss = 0.268902 +Training complete. Best loss: 0.268902 +Model saved to runs/best_model.pth +==== Slurm accounting summary 24438134 ==== +JobID|NTasks|AveCPU|AveRSS|MaxRSS|MaxVMSize|TRESUsageInAve|TRESUsageInMax +24438134.extern|1|00:00:00|856K|3752K|575840K|cpu=00:00:00,energy=0,fs/disk=2332,mem=856K,pages=2,vmem=217160K|cpu=00:00:00,energy=0,fs/disk=2332,mem=3752K,pages=2,vmem=575840K +24438134.batch|1|6-04:57:46|11960K|21279432K|53545480K|cpu=6-04:57:46,energy=0,fs/disk=6648661618,mem=11960K,pages=19,vmem=356728K|cpu=6-04:57:46,energy=0,fs/disk=6648661618,mem=21279432K,pages=8555,vmem=53545480K + +******************************************************************************** +* * +* This is the automated job summary provided by DKRZ. * +* If you encounter problems, need assistance or have any suggestion, please * +* write an email to * +* * +* -- support@dkrz.de -- * +* * +* We hope you enjoyed the DKRZ supercomputer LEVANTE ... * +* +* JobID : 24438134 +* JobName : eso4clima +* Account : bd0854 +* User : b383704 (202985), bd0854 (1473) +* Partition : compute +* QOS : normal +* Nodelist : l40346 (1) +* Submit date : 2026-04-23T12:03:09 +* Start time : 2026-04-23T12:04:45 +* End time : 2026-04-23T14:04:54 +* Elapsed time : 02:00:09 (Timelimit=04:00:00) +* Command : /home/b/b383704/eso4clima/train_twoyears/ +* example_subset.slurm +* WorkDir : /home/b/b383704/eso4clima/train_twoyears +* +* StepID | JobName NodeHours MaxRSS [Byte] (@task) +* ------------------------------------------------------------------------------ +* batch | batch 2.0 +* extern | extern 2.0 3752K (0) +* ------------------------------------------------------------------------------ + +JobID|JobName|Partition|AllocCPUS|Elapsed|TotalCPU|MaxRSS|State|ExitCode +24438134|eso4clima|compute|256|02:00:09|6-04:57:47||COMPLETED|0:0 +24438134.batch|batch||256|02:00:09|6-04:57:47|21279432K|COMPLETED|0:0 +24438134.extern|extern||256|02:00:09|00:00.001|3752K|COMPLETED|0:0 diff --git a/scripts/eso4clima_24449471_full.out b/scripts/eso4clima_24449471_full.out new file mode 100644 index 0000000..c5b28bb --- /dev/null +++ b/scripts/eso4clima_24449471_full.out @@ -0,0 +1,232 @@ +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +2026-04-23 17:37:53,532 - INFO - Creating the model... +2026-04-23 17:37:53,750 - INFO - Creating the dataset... +/home/b/b383704/eso4clima/ClimaNet/climanet/dataset.py:112: UserWarning: Patch size (120, 120) does not evenly divide image dimensions (H=720, W=640). Uncovered pixels: 0 in height, 40 in width. Consider adjusting patch_size or image dimensions for full coverage. + warnings.warn( +2026-04-23 17:38:52,233 - INFO - Starting training... +Epoch 0: best_loss = 13.395518 +Epoch 20: best_loss = 5.020292 +slurmstepd: error: *** JOB 24449471 ON l10543 CANCELLED AT 2026-04-23T18:37:47 DUE TO TIME LIMIT *** + +******************************************************************************** +* * +* This is the automated job summary provided by DKRZ. * +* If you encounter problems, need assistance or have any suggestion, please * +* write an email to * +* * +* -- support@dkrz.de -- * +* * +* We hope you enjoyed the DKRZ supercomputer LEVANTE ... * +* +* JobID : 24449471 +* JobName : eso4clima +* Account : bd0854 +* User : b383704 (202985), bd0854 (1473) +* Partition : compute +* QOS : normal +* Nodelist : l10543 (1) +* Submit date : 2026-04-23T17:37:01 +* Start time : 2026-04-23T17:37:37 +* End time : 2026-04-23T18:37:47 +* Elapsed time : 01:00:10 (Timelimit=01:00:00) +* Command : /home/b/b383704/eso4clima/train_twoyears/ +* example_subset.slurm +* WorkDir : /home/b/b383704/eso4clima/train_twoyears +* +* StepID | JobName NodeHours MaxRSS [Byte] (@task) +* ------------------------------------------------------------------------------ +* batch | batch 1.0 +* extern | extern 1.0 3812K (0) +* ------------------------------------------------------------------------------ + diff --git a/scripts/example.slurm b/scripts/example.slurm new file mode 100644 index 0000000..01b3d96 --- /dev/null +++ b/scripts/example.slurm @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH --job-name=eso4clima +#SBATCH --partition=compute +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=128 +#SBATCH --time=04:00:00 +#SBATCH --account=bd0854 +#SBATCH --output=eso4clima_%j.out + +source /home/b/b383704/eso4clima/ClimaNet/.venv/bin/activate + +# Run the training script +python -u /home/b/b383704/eso4clima/train_twoyears/example_training.py + +echo "==== Slurm accounting summary ${SLURM_JOB_ID} ====" +sstat --allsteps -j "$SLURM_JOB_ID" \ + --format=JobID,NTasks,AveCPU,AveRSS,MaxRSS,MaxVMSize,TresUsageInAve,TresUsageInMax \ + --parsable2 \ No newline at end of file diff --git a/scripts/example_training.py b/scripts/example_training.py new file mode 100644 index 0000000..6f07095 --- /dev/null +++ b/scripts/example_training.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +from pathlib import Path +import xarray as xr +from climanet.st_encoder_decoder import SpatioTemporalModel +from climanet.train import train_monthly_model +from climanet import STDataset + + +def main(): + # Data settings + # Data folder + data_folder = Path("/work/bd0854/b380103/eso4clima/output/v1.0/concatenated/") + # Path to land-sea mask file (need to setup in the experiment directory) + lsm_file = "/home/b/b383704/eso4clima/train_twoyears/data/era5_lsm_bool.nc" + patch_size_training = 120 # Spatial patch size for the training samples (lat, lon) + # Must be divisible by the model patch size + # Default input data has 720x1440 spatial dimensions + + # Training settings + patch_size_model = (1, 4, 4) # Size of model encoder (time, lat, lon). + overlap = 1 # Overlap between patches (in pixels). + num_months = 24 # Number of months to predict (model output channels) + batch_size = 10 # Number of samples per batch in training + num_epoch = 501 # Maximum number of epochs to train + patience = 10 # Number of epochs to wait for improvement before early stopping + accumulation_steps = 2 # Number of batches to accumulate gradients over + run_dir = "./runs" # Directory to save logs and model checkpoints + + # Get list of daily and monthly files, sort by time + daily_files = sorted(data_folder.rglob("20*_day_ERA5_masked_ts.nc")) + monthly_files = sorted(data_folder.rglob("20*_mon_ERA5_full_ts.nc")) + + # Open datasets with chunks + # The chunk sizes are chosen as twice the sample patch size + daily_data = xr.open_mfdataset( + daily_files, + combine="by_coords", + chunks={ + "time": 1, + "lat": patch_size_training * 2, + "lon": patch_size_training * 2, + }, + data_vars="minimal", + coords="minimal", + compat="override", + parallel=False, + ) + monthly_data = xr.open_mfdataset( + monthly_files, + combine="by_coords", + chunks={ + "time": 1, + "lat": patch_size_training * 2, + "lon": patch_size_training * 2, + }, + data_vars="minimal", + coords="minimal", + compat="override", + parallel=False, + ) + lsm_mask = xr.open_dataset(lsm_file) + + # Excluding longitudes of the last 0.2 degrees + # These are NAN values + # Experiments found they causes loss=inf + # subset data to smaller region for testing + lon_subset = slice(-179.8, 179.8) + + daily_data = daily_data.sel(lon=lon_subset) + monthly_data = monthly_data.sel(lon=lon_subset) + lsm_mask = lsm_mask.sel(lon=lon_subset) # True=Land + + # create the model + print("Creating the model...") + model = SpatioTemporalModel( + patch_size=patch_size_model, + overlap=overlap, + max_months=num_months, + num_months=num_months, + ) + + # Make a dataset + print("Creating the dataset...") + dataset = STDataset( + daily_da=daily_data["ts"], + monthly_da=monthly_data["ts"], + land_mask=lsm_mask["lsm"], + patch_size=(patch_size_training, patch_size_training), + ) + + # Train the model + # Results will be saved to runs/best_model.pth + print("Starting training...") + _ = train_monthly_model( + model, + dataset, + batch_size=batch_size, + num_epoch=num_epoch, + patience=patience, + accumulation_steps=accumulation_steps, + run_dir=run_dir, + ) + + +if __name__ == "__main__": + main()