[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 62810707e160 · 2025-10-08T20:07:14.000Z
for more information, see https://pre-commit.ci
diff --git a/machine_learning/decision_tree_pruning.py b/machine_learning/decision_tree_pruning.py
@@ -104,7 +104,7 @@ def _gini(self, y: np.ndarray) -> float:
 
         _, counts = np.unique(y, return_counts=True)
         probabilities = counts / len(y)
-        return 1 - np.sum(probabilities ** 2)
+        return 1 - np.sum(probabilities**2)
 
     def _entropy(self, y: np.ndarray) -> float:
         """
@@ -140,7 +140,7 @@ def _find_best_split(
         """
         best_feature = -1
         best_threshold = 0.0
-        best_impurity = float('inf')
+        best_impurity = float("inf")
 
         n_features = x.shape[1]
         current_impurity = self._mse(y) if task_type == "regression" else self._gini(y)
@@ -194,7 +194,7 @@ def _build_tree(
         x: np.ndarray,
         y: np.ndarray,
         depth: int = 0,
-        task_type: str = "regression"
+        task_type: str = "regression",
     ) -> "TreeNode":
         """
         Recursively build the decision tree.
@@ -211,9 +211,11 @@ def _build_tree(
         node = TreeNode()
 
         # Check stopping criteria
-        if (len(y) < self.min_samples_split or
-            (self.max_depth is not None and depth >= self.max_depth) or
-            len(np.unique(y)) == 1):
+        if (
+            len(y) < self.min_samples_split
+            or (self.max_depth is not None and depth >= self.max_depth)
+            or len(np.unique(y)) == 1
+        ):
             node.is_leaf = True
             node.value = (
                 np.mean(y) if task_type == "regression" else self._most_common(y)
@@ -247,9 +249,7 @@ def _build_tree(
         node.impurity = best_impurity
 
         # Recursively build left and right subtrees
-        node.left = self._build_tree(
-            x[left_mask], y[left_mask], depth + 1, task_type
-        )
+        node.left = self._build_tree(x[left_mask], y[left_mask], depth + 1, task_type)
         node.right = self._build_tree(
             x[right_mask], y[right_mask], depth + 1, task_type
         )
@@ -651,7 +651,7 @@ def compare_pruning_methods() -> None:
             max_depth=10,
             min_samples_leaf=2,
             pruning_method=method,  # type: ignore[arg-type]
-            ccp_alpha=0.01
+            ccp_alpha=0.01,
         )
 
         if method == "reduced_error":
@@ -686,7 +686,7 @@ def main() -> None:
         max_depth=10,
         min_samples_leaf=2,
         pruning_method="cost_complexity",
-        ccp_alpha=0.01
+        ccp_alpha=0.01,
     )
     tree_reg.fit(x_train, y_train)
 
@@ -713,9 +713,7 @@ def main() -> None:
     y_val, y_train = y_train[:val_split], y_train[val_split:]
 
     tree_cls = DecisionTreePruning(
-        max_depth=10,
-        min_samples_leaf=2,
-        pruning_method="reduced_error"
+        max_depth=10, min_samples_leaf=2, pruning_method="reduced_error"
     )
     tree_cls.fit(x_train, y_train, x_val, y_val)
 
@@ -733,4 +731,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-
diff --git a/machine_learning/logistic_regression_vectorized.py b/machine_learning/logistic_regression_vectorized.py
@@ -445,6 +445,7 @@ def generate_sample_data(
     else:
         # Multi-class classification
         from sklearn.datasets import make_classification
+
         x, y = make_classification(
             n_samples=n_samples,
             n_features=n_features,
@@ -544,4 +545,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-
diff --git a/machine_learning/naive_bayes_laplace.py b/machine_learning/naive_bayes_laplace.py
@@ -103,7 +103,8 @@ def _compute_class_prior(self, y: np.ndarray) -> dict[int, float]:
 
         return prior
 
-    def _compute_feature_counts(self, x: np.ndarray, y: np.ndarray
+    def _compute_feature_counts(
+        self, x: np.ndarray, y: np.ndarray
     ) -> dict[int, dict[int, dict[int, int]]]:
         """
         Compute feature counts for each class (for discrete features).
@@ -144,7 +145,8 @@ def _compute_feature_counts(self, x: np.ndarray, y: np.ndarray
 
         return feature_counts
 
-    def _compute_feature_statistics(self, x: np.ndarray, y: np.ndarray
+    def _compute_feature_statistics(
+        self, x: np.ndarray, y: np.ndarray
     ) -> tuple[dict[int, dict[int, float]], dict[int, dict[int, float]]]:
         """
         Compute mean and variance for each feature in each class (continuous features).
@@ -185,7 +187,8 @@ def _compute_feature_statistics(self, x: np.ndarray, y: np.ndarray
 
         return means, variances
 
-    def _compute_log_probabilities_discrete(self, x: np.ndarray, y: np.ndarray
+    def _compute_log_probabilities_discrete(
+        self, x: np.ndarray, y: np.ndarray
     ) -> dict[int, dict[int, dict[int, float]]]:
         """
         Compute log probabilities for discrete features with Laplace smoothing.
@@ -224,9 +227,9 @@ def _compute_log_probabilities_discrete(self, x: np.ndarray, y: np.ndarray
                     )
 
                     # Store log probability
-                    log_probabilities[class_label][feature_idx][
-                        feature_value
-                    ] = np.log(smoothed_prob)
+                    log_probabilities[class_label][feature_idx][feature_value] = np.log(
+                        smoothed_prob
+                    )
 
         return log_probabilities
 
@@ -319,9 +322,9 @@ def _predict_log_proba_discrete(self, x: np.ndarray) -> np.ndarray:
                         feature_value_int
                         in self.feature_log_prob_[class_label][feature_idx]
                     ):
-                        log_prob = self.feature_log_prob_[class_label][
-                            feature_idx
-                        ][feature_value_int]
+                        log_prob = self.feature_log_prob_[class_label][feature_idx][
+                            feature_value_int
+                        ]
                     else:
                         # Unseen feature value: use Laplace smoothing
                         all_values = list(
@@ -370,10 +373,12 @@ def _predict_log_proba_continuous(self, x: np.ndarray) -> np.ndarray:
 
                 # Compute Gaussian log probabilities for all samples
                 feature_values = x[:, feature_idx]
-                log_proba[:, i] += np.array([
-                    self._gaussian_log_probability(val, means, variances)
-                    for val in feature_values
-                ])
+                log_proba[:, i] += np.array(
+                    [
+                        self._gaussian_log_probability(val, means, variances)
+                        for val in feature_values
+                    ]
+                )
 
         return log_proba
 
@@ -660,4 +665,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-
diff --git a/machine_learning/pca_from_scratch.py b/machine_learning/pca_from_scratch.py
@@ -159,9 +159,7 @@ def fit(self, x: np.ndarray) -> "PCAFromScratch":
                 f"n_components={self.n_components} cannot be larger than "
                 f"min(n_samples, n_features)={min(n_samples, n_features)}"
             )
-            raise ValueError(
-                msg
-            )
+            raise ValueError(msg)
 
         # Standardize the data
         x_standardized = self._standardize_data(x)
@@ -173,14 +171,12 @@ def fit(self, x: np.ndarray) -> "PCAFromScratch":
         eigenvalues, eigenvectors = self._eigenvalue_decomposition(covariance_matrix)
 
         # Select the top n_components
-        self.components_ = eigenvectors[:, :self.n_components]
-        self.explained_variance_ = eigenvalues[:self.n_components]
+        self.components_ = eigenvectors[:, : self.n_components]
+        self.explained_variance_ = eigenvalues[: self.n_components]
 
         # Calculate explained variance ratio
         total_variance = np.sum(eigenvalues)
-        self.explained_variance_ratio_ = (
-            self.explained_variance_ / total_variance
-        )
+        self.explained_variance_ratio_ = self.explained_variance_ / total_variance
 
         return self
 
@@ -327,7 +323,7 @@ def main() -> None:
     print(f"\nReconstruction error (MSE): {reconstruction_error:.6f}")
 
     # Compare with sklearn
-    print("\n" + "="*50)
+    print("\n" + "=" * 50)
     print("Comparison with scikit-learn:")
     compare_with_sklearn()