GeoOcean · tausiaj · Feb 20, 2025 · Feb 20, 2025
diff --git a/bluemath_tk/datamining/pca.py b/bluemath_tk/datamining/pca.py
@@ -2,6 +2,7 @@
 
 import cartopy.crs as ccrs
 import numpy as np
+import pandas as pd
 import xarray as xr
 from sklearn.decomposition import PCA as PCA_
 from sklearn.decomposition import IncrementalPCA as IncrementalPCA_
@@ -75,6 +76,8 @@ class PCA(BaseReduction):
         The explained variance ratio.
     cumulative_explained_variance_ratio : np.ndarray
         The cumulative explained variance ratio.
+    pcs_df : pd.DataFrame
+        The fitting PCs (self.pcs) as a pandas DataFrame.
 
     Methods
     -------
@@ -97,33 +100,41 @@ class PCA(BaseReduction):
 
     Examples
     --------
-    >>> from bluemath_tk.core.data.sample_data import get_2d_dataset
-    >>> from bluemath_tk.datamining.pca import PCA
-    >>> ds = get_2d_dataset()
-    >>> pca = PCA(
-    ...     n_components=5,
-    ...     is_incremental=False,
-    ...     debug=True,
-    ... )
-    >>> pca.fit(
-    ...     data=ds,
-    ...     vars_to_stack=["X", "Y"],
-    ...     coords_to_stack=["coord1", "coord2"],
-    ...     pca_dim_for_rows="coord3",
-    ...     windows_in_pca_dim_for_rows={"X": [1, 2, 3]},
-    ...     value_to_replace_nans={"X": 0.0},
-    ...     nan_threshold_to_drop={"X": 0.95},
-    ... )
-    >>> pcs = pca.transform(
-    ...     data=ds,
-    ... )
-    >>> reconstructed_ds = pca.inverse_transform(PCs=pcs)
-    >>> eofs = pca.eofs
-    >>> explained_variance = pca.explained_variance
-    >>> explained_variance_ratio = pca.explained_variance_ratio
-    >>> cumulative_explained_variance_ratio = pca.cumulative_explained_variance_ratio
-    >>> # Save the full class in a pickle file
-    >>> pca.save_model("pca_model.pkl")
+    .. jupyter-execute::
+
+        from bluemath_tk.core.data.sample_data import get_2d_dataset
+        from bluemath_tk.datamining.pca import PCA
+
+        ds = get_2d_dataset()
+
+        pca = PCA(
+            n_components=5,
+            is_incremental=False,
+            debug=True,
+        )
+        pca.fit(
+            data=ds,
+            vars_to_stack=["X", "Y"],
+            coords_to_stack=["coord1", "coord2"],
+            pca_dim_for_rows="coord3",
+            windows_in_pca_dim_for_rows={"X": [1, 2, 3]},
+            value_to_replace_nans={"X": 0.0},
+            nan_threshold_to_drop={"X": 0.95},
+        )
+        pcs = pca.transform(
+            data=ds,
+        )
+        reconstructed_ds = pca.inverse_transform(PCs=pcs)
+        eofs = pca.eofs
+        explained_variance = pca.explained_variance
+        explained_variance_ratio = pca.explained_variance_ratio
+        cumulative_explained_variance_ratio = pca.cumulative_explained_variance_ratio
+
+        # Save the full class in a pickle file
+        pca.save_model("pca_model.pkl")
+
+        # Plot the calculated EOFs
+        pca.plot_eofs(vars_to_plot=["X", "Y"], num_eofs=3)
 
     References
     ----------
@@ -212,10 +223,10 @@ def __init__(
 
         # Exclude attributes from beign saved with pca.save_model()
         self._exclude_attributes = [
-            # "_data",
+            "_data",
             "_window_processed_data",
-            # "_stacked_data_matrix",
-            # "_standarized_stacked_data_matrix",
+            "_stacked_data_matrix",
+            "_standarized_stacked_data_matrix",
         ]
 
     @property
@@ -254,6 +265,19 @@ def explained_variance_ratio(self) -> np.ndarray:
     def cumulative_explained_variance_ratio(self) -> np.ndarray:
         return np.cumsum(self.explained_variance_ratio)
 
+    @property
+    def pcs_df(self) -> pd.DataFrame:
+        if self.pcs is not None:
+            return pd.DataFrame(
+                data=self.pcs["PCs"].values,
+                columns=[f"PC{i + 1}" for i in range(self.pca.n_components_)],
+                index=self.pcs[self.pca_dim_for_rows].values,
+            )
+        else:
+            raise PCAError(
+                "PCA model must be fitted and transformed before calling pcs_df"
+            )
+
     def _generate_stacked_data(self, data: xr.Dataset) -> np.ndarray:
         """
         Generate stacked data matrix.
@@ -606,7 +630,7 @@ def transform(self, data: xr.Dataset, after_fitting: bool = False) -> xr.Dataset
         transformed_data = self.pca.transform(X=processed_data)
 
         # Save the Principal Components (PCs) in an xr.Dataset
-        self.pcs = xr.Dataset(
+        pcs = xr.Dataset(
             {
                 "PCs": ((self.pca_dim_for_rows, "n_component"), transformed_data),
                 "stds": (("n_component",), np.std(transformed_data, axis=0)),
@@ -616,8 +640,10 @@ def transform(self, data: xr.Dataset, after_fitting: bool = False) -> xr.Dataset
                 "n_component": np.arange(self.pca.n_components_),
             },
         )
+        if after_fitting:
+            self.pcs = pcs.copy()
 
-        return self.pcs.copy()
+        return pcs
 
     def fit_transform(
         self,
@@ -736,7 +762,7 @@ def plot_pcs(self, num_pcs: int, pcs: xr.Dataset = None) -> None:
                     "No Principal Components (PCs) found. Please transform some data first."
                 )
             self.logger.info("Using the Principal Components (PCs) from the class")
-            pcs = self.pcs
+            pcs = self.pcs.copy()
 
         _ = (
             pcs["PCs"]

diff --git a/bluemath_tk/wrappers/_base_wrappers.py b/bluemath_tk/wrappers/_base_wrappers.py
@@ -422,7 +422,7 @@ def run_cases(
             cases_dir_to_run = copy.deepcopy(self.cases_dirs)
 
         if parallel:
-            num_threads = self.get_num_processors_available()
+            num_threads = 5  # self.get_num_processors_available()
             self.logger.debug(
                 f"Running cases in parallel with launcher={launcher}. Number of threads: {num_threads}."
             )

diff --git a/bluemath_tk/wrappers/swan/swan_example.py b/bluemath_tk/wrappers/swan/swan_example.py
@@ -12,69 +12,173 @@
 from bluemath_tk.wrappers.swan.swan_wrapper import SwanModelWrapper
 
 example_directions = [
+    1.5,
+    4.5,
     7.5,
+    10.5,
+    13.5,
+    16.5,
+    19.5,
     22.5,
+    25.5,
+    28.5,
+    31.5,
+    34.5,
     37.5,
+    40.5,
+    43.5,
+    46.5,
+    49.5,
     52.5,
+    55.5,
+    58.5,
+    61.5,
+    64.5,
     67.5,
+    70.5,
+    73.5,
+    76.5,
+    79.5,
     82.5,
+    85.5,
+    88.5,
+    91.5,
+    94.5,
     97.5,
+    100.5,
+    103.5,
+    106.5,
+    109.5,
     112.5,
+    115.5,
+    118.5,
+    121.5,
+    124.5,
     127.5,
+    130.5,
+    133.5,
+    136.5,
+    139.5,
     142.5,
+    145.5,
+    148.5,
+    151.5,
+    154.5,
     157.5,
+    160.5,
+    163.5,
+    166.5,
+    169.5,
     172.5,
+    175.5,
+    178.5,
+    181.5,
+    184.5,
     187.5,
+    190.5,
+    193.5,
+    196.5,
+    199.5,
     202.5,
+    205.5,
+    208.5,
+    211.5,
+    214.5,
     217.5,
+    220.5,
+    223.5,
+    226.5,
+    229.5,
     232.5,
+    235.5,
+    238.5,
+    241.5,
+    244.5,
     247.5,
+    250.5,
+    253.5,
+    256.5,
+    259.5,
     262.5,
+    265.5,
+    268.5,
+    271.5,
+    274.5,
     277.5,
+    280.5,
+    283.5,
+    286.5,
+    289.5,
     292.5,
+    295.5,
+    298.5,
+    301.5,
+    304.5,
     307.5,
+    310.5,
+    313.5,
+    316.5,
+    319.5,
     322.5,
+    325.5,
+    328.5,
+    331.5,
+    334.5,
     337.5,
+    340.5,
+    343.5,
+    346.5,
+    349.5,
     352.5,
+    355.5,
+    358.5,
 ]
 example_frequencies = [
-    0.035,
-    0.0385,
-    0.042349998,
-    0.046585,
-    0.051243503,
-    0.05636785,
-    0.062004633,
-    0.068205096,
-    0.07502561,
-    0.082528174,
-    0.090780996,
-    0.099859096,
-    0.10984501,
-    0.120829515,
-    0.13291247,
-    0.14620373,
-    0.1608241,
-    0.17690653,
-    0.19459718,
-    0.21405691,
-    0.2354626,
-    0.25900885,
-    0.28490975,
-    0.31340075,
-    0.3447408,
-    0.37921488,
-    0.4171364,
-    0.45885003,
-    0.50473505,
+    0.03,
+    0.033,
+    0.0363,
+    0.0399,
+    0.0438,
+    0.0482,
+    0.053,
+    0.0582,
+    0.064,
+    0.0704,
+    0.0774,
+    0.0851,
+    0.0935,
+    0.1028,
+    0.1131,
+    0.1243,
+    0.1367,
+    0.1503,
+    0.1652,
+    0.1816,
+    0.1997,
+    0.2195,
+    0.2413,
+    0.2653,
+    0.2917,
+    0.3207,
+    0.3526,
+    0.3876,
+    0.4262,
+    0.4685,
+    0.5151,
+    0.5663,
+    0.6226,
+    0.6845,
+    0.7525,
+    0.8273,
+    0.9096,
+    1.0,
 ]
 
 
 class BinWavesWrapper(SwanModelWrapper):
     """ """
 
     def build_case(self, case_dir: str, case_context: dict):
-        self.logger.info(f"Saving spectrum for {case_dir}")
         input_spectrum = construct_partition(
             freq_name="jonswap",
             freq_kwargs={
@@ -105,13 +209,12 @@ def build_cases(self, mode="one_by_one"):
     templates_dir = (
         "/home/tausiaj/GitHub-GeoOcean/BlueMath/bluemath_tk/wrappers/swan/templates/"
     )
-    templates_name = ["input.swn", "depth_main.dat", "buoys.loc"]
-    output_dir = "/home/tausiaj/GitHub-GeoOcean/BlueMath/test_cases/swan/javi/"
+    templates_name = ["input.swn", "depth_main_cantabria.dat", "buoys.loc"]
+    output_dir = "/home/tausiaj/GitHub-GeoOcean/BlueMath/test_cases/swan/CAN/"
     # Load swan model parameters
     model_parameters = (
         xr.open_dataset("/home/tausiaj/GitHub-GeoOcean/BlueMath/test_data/subset.nc")
         .to_dataframe()
-        .iloc[::60]
         .to_dict(orient="list")
     )
     # Create an instance of the SWAN model wrapper
@@ -126,11 +229,11 @@ def build_cases(self, mode="one_by_one"):
     # List available launchers
     print(swan_wrapper.list_available_launchers())
     # Run the model
-    swan_wrapper.run_cases(launcher="docker", parallel=True)
+    # swan_wrapper.run_cases(launcher="docker", parallel=True)
     # Post-process the output files
-    postprocessed_ds = swan_wrapper.postprocess_cases()
-    postprocessed_ds.to_netcdf(op.join(swan_wrapper.output_dir, "waves_part.nc"))
-    print(postprocessed_ds)
+    # postprocessed_ds = swan_wrapper.postprocess_cases()
+    # postprocessed_ds.to_netcdf(op.join(swan_wrapper.output_dir, "waves_part.nc"))
+    # print(postprocessed_ds)
     # # Load spectra example
     # spectra = xr.open_dataset(
     #     "/home/tausiaj/GitHub-GeoOcean/BlueMath/test_data/Waves_Cantabria_356.08_43.82.nc"