[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit f64f82f3bafe · 2025-10-08T19:23:54.000Z
for more information, see https://pre-commit.ci
diff --git a/machine_learning/decision_tree_pruning.py b/machine_learning/decision_tree_pruning.py
@@ -104,7 +104,7 @@ def _gini(self, y: np.ndarray) -> float:
 
         _, counts = np.unique(y, return_counts=True)
         probabilities = counts / len(y)
-        return 1 - np.sum(probabilities ** 2)
+        return 1 - np.sum(probabilities**2)
 
     def _entropy(self, y: np.ndarray) -> float:
         """
@@ -140,7 +140,7 @@ def _find_best_split(
         """
         best_feature = -1
         best_threshold = 0.0
-        best_impurity = float('inf')
+        best_impurity = float("inf")
 
         n_features = x.shape[1]
         current_impurity = self._mse(y) if task_type == "regression" else self._gini(y)
@@ -194,7 +194,7 @@ def _build_tree(
         x: np.ndarray,
         y: np.ndarray,
         depth: int = 0,
-        task_type: str = "regression"
+        task_type: str = "regression",
     ) -> "TreeNode":
         """
         Recursively build the decision tree.
@@ -211,9 +211,11 @@ def _build_tree(
         node = TreeNode()
 
         # Check stopping criteria
-        if (len(y) < self.min_samples_split or
-            (self.max_depth is not None and depth >= self.max_depth) or
-            len(np.unique(y)) == 1):
+        if (
+            len(y) < self.min_samples_split
+            or (self.max_depth is not None and depth >= self.max_depth)
+            or len(np.unique(y)) == 1
+        ):
             node.is_leaf = True
             node.value = (
                 np.mean(y) if task_type == "regression" else self._most_common(y)
@@ -247,9 +249,7 @@ def _build_tree(
         node.impurity = best_impurity
 
         # Recursively build left and right subtrees
-        node.left = self._build_tree(
-            x[left_mask], y[left_mask], depth + 1, task_type
-        )
+        node.left = self._build_tree(x[left_mask], y[left_mask], depth + 1, task_type)
         node.right = self._build_tree(
             x[right_mask], y[right_mask], depth + 1, task_type
         )
@@ -635,10 +635,7 @@ def compare_pruning_methods() -> None:
         print(f"\n=== {method_name} ===")
 
         tree = DecisionTreePruning(
-            max_depth=10,
-            min_samples_leaf=2,
-            pruning_method=method,
-            ccp_alpha=0.01
+            max_depth=10, min_samples_leaf=2, pruning_method=method, ccp_alpha=0.01
         )
 
         if method == "reduced_error":
@@ -673,7 +670,7 @@ def main() -> None:
         max_depth=10,
         min_samples_leaf=2,
         pruning_method="cost_complexity",
-        ccp_alpha=0.01
+        ccp_alpha=0.01,
     )
     tree_reg.fit(x_train, y_train)
 
@@ -700,9 +697,7 @@ def main() -> None:
     y_val, y_train = y_train[:val_split], y_train[val_split:]
 
     tree_cls = DecisionTreePruning(
-        max_depth=10,
-        min_samples_leaf=2,
-        pruning_method="reduced_error"
+        max_depth=10, min_samples_leaf=2, pruning_method="reduced_error"
     )
     tree_cls.fit(x_train, y_train, x_val, y_val)
 
@@ -720,4 +715,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-
diff --git a/machine_learning/logistic_regression_vectorized.py b/machine_learning/logistic_regression_vectorized.py
@@ -436,6 +436,7 @@ def generate_sample_data(
     else:
         # Multi-class classification
         from sklearn.datasets import make_classification
+
         X, y = make_classification(
             n_samples=n_samples,
             n_features=n_features,
@@ -535,4 +536,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-
diff --git a/machine_learning/naive_bayes_laplace.py b/machine_learning/naive_bayes_laplace.py
@@ -226,9 +226,9 @@ def _compute_log_probabilities_discrete(
                     )
 
                     # Store log probability
-                    log_probabilities[class_label][feature_idx][
-                        feature_value
-                    ] = np.log(smoothed_prob)
+                    log_probabilities[class_label][feature_idx][feature_value] = np.log(
+                        smoothed_prob
+                    )
 
         return log_probabilities
 
@@ -317,9 +317,9 @@ def _predict_log_proba_discrete(self, X: np.ndarray) -> np.ndarray:
                         feature_value
                         in self.feature_log_prob_[class_label][feature_idx]
                     ):
-                        log_prob = self.feature_log_prob_[class_label][
-                            feature_idx
-                        ][feature_value]
+                        log_prob = self.feature_log_prob_[class_label][feature_idx][
+                            feature_value
+                        ]
                     else:
                         # Unseen feature value: use Laplace smoothing
                         all_values = list(
@@ -651,4 +651,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-
diff --git a/machine_learning/pca_from_scratch.py b/machine_learning/pca_from_scratch.py
@@ -159,9 +159,7 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
                 f"n_components={self.n_components} cannot be larger than "
                 f"min(n_samples, n_features)={min(n_samples, n_features)}"
             )
-            raise ValueError(
-                msg
-            )
+            raise ValueError(msg)
 
         # Standardize the data
         X_standardized = self._standardize_data(X)
@@ -173,14 +171,12 @@ def fit(self, X: np.ndarray) -> "PCAFromScratch":
         eigenvalues, eigenvectors = self._eigenvalue_decomposition(covariance_matrix)
 
         # Select the top n_components
-        self.components_ = eigenvectors[:, :self.n_components]
-        self.explained_variance_ = eigenvalues[:self.n_components]
+        self.components_ = eigenvectors[:, : self.n_components]
+        self.explained_variance_ = eigenvalues[: self.n_components]
 
         # Calculate explained variance ratio
         total_variance = np.sum(eigenvalues)
-        self.explained_variance_ratio_ = (
-            self.explained_variance_ / total_variance
-        )
+        self.explained_variance_ratio_ = self.explained_variance_ / total_variance
 
         return self
 
@@ -326,7 +322,7 @@ def main() -> None:
     print(f"\nReconstruction error (MSE): {reconstruction_error:.6f}")
 
     # Compare with sklearn
-    print("\n" + "="*50)
+    print("\n" + "=" * 50)
     print("Comparison with scikit-learn:")
     compare_with_sklearn()