Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 59 additions & 33 deletions bluemath_tk/datamining/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import xarray as xr
from sklearn.decomposition import PCA as PCA_
from sklearn.decomposition import IncrementalPCA as IncrementalPCA_
Expand Down Expand Up @@ -75,6 +76,8 @@ class PCA(BaseReduction):
The explained variance ratio.
cumulative_explained_variance_ratio : np.ndarray
The cumulative explained variance ratio.
pcs_df : pd.DataFrame
The fitting PCs (self.pcs) as a pandas DataFrame.

Methods
-------
Expand All @@ -97,33 +100,41 @@ class PCA(BaseReduction):

Examples
--------
>>> from bluemath_tk.core.data.sample_data import get_2d_dataset
>>> from bluemath_tk.datamining.pca import PCA
>>> ds = get_2d_dataset()
>>> pca = PCA(
... n_components=5,
... is_incremental=False,
... debug=True,
... )
>>> pca.fit(
... data=ds,
... vars_to_stack=["X", "Y"],
... coords_to_stack=["coord1", "coord2"],
... pca_dim_for_rows="coord3",
... windows_in_pca_dim_for_rows={"X": [1, 2, 3]},
... value_to_replace_nans={"X": 0.0},
... nan_threshold_to_drop={"X": 0.95},
... )
>>> pcs = pca.transform(
... data=ds,
... )
>>> reconstructed_ds = pca.inverse_transform(PCs=pcs)
>>> eofs = pca.eofs
>>> explained_variance = pca.explained_variance
>>> explained_variance_ratio = pca.explained_variance_ratio
>>> cumulative_explained_variance_ratio = pca.cumulative_explained_variance_ratio
>>> # Save the full class in a pickle file
>>> pca.save_model("pca_model.pkl")
.. jupyter-execute::

from bluemath_tk.core.data.sample_data import get_2d_dataset
from bluemath_tk.datamining.pca import PCA

ds = get_2d_dataset()

pca = PCA(
n_components=5,
is_incremental=False,
debug=True,
)
pca.fit(
data=ds,
vars_to_stack=["X", "Y"],
coords_to_stack=["coord1", "coord2"],
pca_dim_for_rows="coord3",
windows_in_pca_dim_for_rows={"X": [1, 2, 3]},
value_to_replace_nans={"X": 0.0},
nan_threshold_to_drop={"X": 0.95},
)
pcs = pca.transform(
data=ds,
)
reconstructed_ds = pca.inverse_transform(PCs=pcs)
eofs = pca.eofs
explained_variance = pca.explained_variance
explained_variance_ratio = pca.explained_variance_ratio
cumulative_explained_variance_ratio = pca.cumulative_explained_variance_ratio

# Save the full class in a pickle file
pca.save_model("pca_model.pkl")

# Plot the calculated EOFs
pca.plot_eofs(vars_to_plot=["X", "Y"], num_eofs=3)

References
----------
Expand Down Expand Up @@ -212,10 +223,10 @@ def __init__(

# Exclude attributes from beign saved with pca.save_model()
self._exclude_attributes = [
# "_data",
"_data",
"_window_processed_data",
# "_stacked_data_matrix",
# "_standarized_stacked_data_matrix",
"_stacked_data_matrix",
"_standarized_stacked_data_matrix",
]

@property
Expand Down Expand Up @@ -254,6 +265,19 @@ def explained_variance_ratio(self) -> np.ndarray:
def cumulative_explained_variance_ratio(self) -> np.ndarray:
return np.cumsum(self.explained_variance_ratio)

@property
def pcs_df(self) -> pd.DataFrame:
if self.pcs is not None:
return pd.DataFrame(
data=self.pcs["PCs"].values,
columns=[f"PC{i + 1}" for i in range(self.pca.n_components_)],
index=self.pcs[self.pca_dim_for_rows].values,
)
else:
raise PCAError(
"PCA model must be fitted and transformed before calling pcs_df"
)

def _generate_stacked_data(self, data: xr.Dataset) -> np.ndarray:
"""
Generate stacked data matrix.
Expand Down Expand Up @@ -606,7 +630,7 @@ def transform(self, data: xr.Dataset, after_fitting: bool = False) -> xr.Dataset
transformed_data = self.pca.transform(X=processed_data)

# Save the Principal Components (PCs) in an xr.Dataset
self.pcs = xr.Dataset(
pcs = xr.Dataset(
{
"PCs": ((self.pca_dim_for_rows, "n_component"), transformed_data),
"stds": (("n_component",), np.std(transformed_data, axis=0)),
Expand All @@ -616,8 +640,10 @@ def transform(self, data: xr.Dataset, after_fitting: bool = False) -> xr.Dataset
"n_component": np.arange(self.pca.n_components_),
},
)
if after_fitting:
self.pcs = pcs.copy()

return self.pcs.copy()
return pcs

def fit_transform(
self,
Expand Down Expand Up @@ -736,7 +762,7 @@ def plot_pcs(self, num_pcs: int, pcs: xr.Dataset = None) -> None:
"No Principal Components (PCs) found. Please transform some data first."
)
self.logger.info("Using the Principal Components (PCs) from the class")
pcs = self.pcs
pcs = self.pcs.copy()

_ = (
pcs["PCs"]
Expand Down
2 changes: 1 addition & 1 deletion bluemath_tk/wrappers/_base_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def run_cases(
cases_dir_to_run = copy.deepcopy(self.cases_dirs)

if parallel:
num_threads = self.get_num_processors_available()
num_threads = 5 # self.get_num_processors_available()
self.logger.debug(
f"Running cases in parallel with launcher={launcher}. Number of threads: {num_threads}."
)
Expand Down
177 changes: 140 additions & 37 deletions bluemath_tk/wrappers/swan/swan_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,69 +12,173 @@
from bluemath_tk.wrappers.swan.swan_wrapper import SwanModelWrapper

example_directions = [
1.5,
4.5,
7.5,
10.5,
13.5,
16.5,
19.5,
22.5,
25.5,
28.5,
31.5,
34.5,
37.5,
40.5,
43.5,
46.5,
49.5,
52.5,
55.5,
58.5,
61.5,
64.5,
67.5,
70.5,
73.5,
76.5,
79.5,
82.5,
85.5,
88.5,
91.5,
94.5,
97.5,
100.5,
103.5,
106.5,
109.5,
112.5,
115.5,
118.5,
121.5,
124.5,
127.5,
130.5,
133.5,
136.5,
139.5,
142.5,
145.5,
148.5,
151.5,
154.5,
157.5,
160.5,
163.5,
166.5,
169.5,
172.5,
175.5,
178.5,
181.5,
184.5,
187.5,
190.5,
193.5,
196.5,
199.5,
202.5,
205.5,
208.5,
211.5,
214.5,
217.5,
220.5,
223.5,
226.5,
229.5,
232.5,
235.5,
238.5,
241.5,
244.5,
247.5,
250.5,
253.5,
256.5,
259.5,
262.5,
265.5,
268.5,
271.5,
274.5,
277.5,
280.5,
283.5,
286.5,
289.5,
292.5,
295.5,
298.5,
301.5,
304.5,
307.5,
310.5,
313.5,
316.5,
319.5,
322.5,
325.5,
328.5,
331.5,
334.5,
337.5,
340.5,
343.5,
346.5,
349.5,
352.5,
355.5,
358.5,
]
example_frequencies = [
0.035,
0.0385,
0.042349998,
0.046585,
0.051243503,
0.05636785,
0.062004633,
0.068205096,
0.07502561,
0.082528174,
0.090780996,
0.099859096,
0.10984501,
0.120829515,
0.13291247,
0.14620373,
0.1608241,
0.17690653,
0.19459718,
0.21405691,
0.2354626,
0.25900885,
0.28490975,
0.31340075,
0.3447408,
0.37921488,
0.4171364,
0.45885003,
0.50473505,
0.03,
0.033,
0.0363,
0.0399,
0.0438,
0.0482,
0.053,
0.0582,
0.064,
0.0704,
0.0774,
0.0851,
0.0935,
0.1028,
0.1131,
0.1243,
0.1367,
0.1503,
0.1652,
0.1816,
0.1997,
0.2195,
0.2413,
0.2653,
0.2917,
0.3207,
0.3526,
0.3876,
0.4262,
0.4685,
0.5151,
0.5663,
0.6226,
0.6845,
0.7525,
0.8273,
0.9096,
1.0,
]


class BinWavesWrapper(SwanModelWrapper):
""" """

def build_case(self, case_dir: str, case_context: dict):
self.logger.info(f"Saving spectrum for {case_dir}")
input_spectrum = construct_partition(
freq_name="jonswap",
freq_kwargs={
Expand Down Expand Up @@ -105,13 +209,12 @@ def build_cases(self, mode="one_by_one"):
templates_dir = (
"/home/tausiaj/GitHub-GeoOcean/BlueMath/bluemath_tk/wrappers/swan/templates/"
)
templates_name = ["input.swn", "depth_main.dat", "buoys.loc"]
output_dir = "/home/tausiaj/GitHub-GeoOcean/BlueMath/test_cases/swan/javi/"
templates_name = ["input.swn", "depth_main_cantabria.dat", "buoys.loc"]
output_dir = "/home/tausiaj/GitHub-GeoOcean/BlueMath/test_cases/swan/CAN/"
# Load swan model parameters
model_parameters = (
xr.open_dataset("/home/tausiaj/GitHub-GeoOcean/BlueMath/test_data/subset.nc")
.to_dataframe()
.iloc[::60]
.to_dict(orient="list")
)
# Create an instance of the SWAN model wrapper
Expand All @@ -126,11 +229,11 @@ def build_cases(self, mode="one_by_one"):
# List available launchers
print(swan_wrapper.list_available_launchers())
# Run the model
swan_wrapper.run_cases(launcher="docker", parallel=True)
# swan_wrapper.run_cases(launcher="docker", parallel=True)
# Post-process the output files
postprocessed_ds = swan_wrapper.postprocess_cases()
postprocessed_ds.to_netcdf(op.join(swan_wrapper.output_dir, "waves_part.nc"))
print(postprocessed_ds)
# postprocessed_ds = swan_wrapper.postprocess_cases()
# postprocessed_ds.to_netcdf(op.join(swan_wrapper.output_dir, "waves_part.nc"))
# print(postprocessed_ds)
# # Load spectra example
# spectra = xr.open_dataset(
# "/home/tausiaj/GitHub-GeoOcean/BlueMath/test_data/Waves_Cantabria_356.08_43.82.nc"
Expand Down
Loading
Loading