Skip to content

Commit 6281070

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 540772f commit 6281070

File tree

4 files changed

+36
-39
lines changed

4 files changed

+36
-39
lines changed

machine_learning/decision_tree_pruning.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def _gini(self, y: np.ndarray) -> float:
104104

105105
_, counts = np.unique(y, return_counts=True)
106106
probabilities = counts / len(y)
107-
return 1 - np.sum(probabilities ** 2)
107+
return 1 - np.sum(probabilities**2)
108108

109109
def _entropy(self, y: np.ndarray) -> float:
110110
"""
@@ -140,7 +140,7 @@ def _find_best_split(
140140
"""
141141
best_feature = -1
142142
best_threshold = 0.0
143-
best_impurity = float('inf')
143+
best_impurity = float("inf")
144144

145145
n_features = x.shape[1]
146146
current_impurity = self._mse(y) if task_type == "regression" else self._gini(y)
@@ -194,7 +194,7 @@ def _build_tree(
194194
x: np.ndarray,
195195
y: np.ndarray,
196196
depth: int = 0,
197-
task_type: str = "regression"
197+
task_type: str = "regression",
198198
) -> "TreeNode":
199199
"""
200200
Recursively build the decision tree.
@@ -211,9 +211,11 @@ def _build_tree(
211211
node = TreeNode()
212212

213213
# Check stopping criteria
214-
if (len(y) < self.min_samples_split or
215-
(self.max_depth is not None and depth >= self.max_depth) or
216-
len(np.unique(y)) == 1):
214+
if (
215+
len(y) < self.min_samples_split
216+
or (self.max_depth is not None and depth >= self.max_depth)
217+
or len(np.unique(y)) == 1
218+
):
217219
node.is_leaf = True
218220
node.value = (
219221
np.mean(y) if task_type == "regression" else self._most_common(y)
@@ -247,9 +249,7 @@ def _build_tree(
247249
node.impurity = best_impurity
248250

249251
# Recursively build left and right subtrees
250-
node.left = self._build_tree(
251-
x[left_mask], y[left_mask], depth + 1, task_type
252-
)
252+
node.left = self._build_tree(x[left_mask], y[left_mask], depth + 1, task_type)
253253
node.right = self._build_tree(
254254
x[right_mask], y[right_mask], depth + 1, task_type
255255
)
@@ -651,7 +651,7 @@ def compare_pruning_methods() -> None:
651651
max_depth=10,
652652
min_samples_leaf=2,
653653
pruning_method=method, # type: ignore[arg-type]
654-
ccp_alpha=0.01
654+
ccp_alpha=0.01,
655655
)
656656

657657
if method == "reduced_error":
@@ -686,7 +686,7 @@ def main() -> None:
686686
max_depth=10,
687687
min_samples_leaf=2,
688688
pruning_method="cost_complexity",
689-
ccp_alpha=0.01
689+
ccp_alpha=0.01,
690690
)
691691
tree_reg.fit(x_train, y_train)
692692

@@ -713,9 +713,7 @@ def main() -> None:
713713
y_val, y_train = y_train[:val_split], y_train[val_split:]
714714

715715
tree_cls = DecisionTreePruning(
716-
max_depth=10,
717-
min_samples_leaf=2,
718-
pruning_method="reduced_error"
716+
max_depth=10, min_samples_leaf=2, pruning_method="reduced_error"
719717
)
720718
tree_cls.fit(x_train, y_train, x_val, y_val)
721719

@@ -733,4 +731,3 @@ def main() -> None:
733731
if __name__ == "__main__":
734732
doctest.testmod()
735733
main()
736-

machine_learning/logistic_regression_vectorized.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ def generate_sample_data(
445445
else:
446446
# Multi-class classification
447447
from sklearn.datasets import make_classification
448+
448449
x, y = make_classification(
449450
n_samples=n_samples,
450451
n_features=n_features,
@@ -544,4 +545,3 @@ def main() -> None:
544545
if __name__ == "__main__":
545546
doctest.testmod()
546547
main()
547-

machine_learning/naive_bayes_laplace.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def _compute_class_prior(self, y: np.ndarray) -> dict[int, float]:
103103

104104
return prior
105105

106-
def _compute_feature_counts(self, x: np.ndarray, y: np.ndarray
106+
def _compute_feature_counts(
107+
self, x: np.ndarray, y: np.ndarray
107108
) -> dict[int, dict[int, dict[int, int]]]:
108109
"""
109110
Compute feature counts for each class (for discrete features).
@@ -144,7 +145,8 @@ def _compute_feature_counts(self, x: np.ndarray, y: np.ndarray
144145

145146
return feature_counts
146147

147-
def _compute_feature_statistics(self, x: np.ndarray, y: np.ndarray
148+
def _compute_feature_statistics(
149+
self, x: np.ndarray, y: np.ndarray
148150
) -> tuple[dict[int, dict[int, float]], dict[int, dict[int, float]]]:
149151
"""
150152
Compute mean and variance for each feature in each class (continuous features).
@@ -185,7 +187,8 @@ def _compute_feature_statistics(self, x: np.ndarray, y: np.ndarray
185187

186188
return means, variances
187189

188-
def _compute_log_probabilities_discrete(self, x: np.ndarray, y: np.ndarray
190+
def _compute_log_probabilities_discrete(
191+
self, x: np.ndarray, y: np.ndarray
189192
) -> dict[int, dict[int, dict[int, float]]]:
190193
"""
191194
Compute log probabilities for discrete features with Laplace smoothing.
@@ -224,9 +227,9 @@ def _compute_log_probabilities_discrete(self, x: np.ndarray, y: np.ndarray
224227
)
225228

226229
# Store log probability
227-
log_probabilities[class_label][feature_idx][
228-
feature_value
229-
] = np.log(smoothed_prob)
230+
log_probabilities[class_label][feature_idx][feature_value] = np.log(
231+
smoothed_prob
232+
)
230233

231234
return log_probabilities
232235

@@ -319,9 +322,9 @@ def _predict_log_proba_discrete(self, x: np.ndarray) -> np.ndarray:
319322
feature_value_int
320323
in self.feature_log_prob_[class_label][feature_idx]
321324
):
322-
log_prob = self.feature_log_prob_[class_label][
323-
feature_idx
324-
][feature_value_int]
325+
log_prob = self.feature_log_prob_[class_label][feature_idx][
326+
feature_value_int
327+
]
325328
else:
326329
# Unseen feature value: use Laplace smoothing
327330
all_values = list(
@@ -370,10 +373,12 @@ def _predict_log_proba_continuous(self, x: np.ndarray) -> np.ndarray:
370373

371374
# Compute Gaussian log probabilities for all samples
372375
feature_values = x[:, feature_idx]
373-
log_proba[:, i] += np.array([
374-
self._gaussian_log_probability(val, means, variances)
375-
for val in feature_values
376-
])
376+
log_proba[:, i] += np.array(
377+
[
378+
self._gaussian_log_probability(val, means, variances)
379+
for val in feature_values
380+
]
381+
)
377382

378383
return log_proba
379384

@@ -660,4 +665,3 @@ def main() -> None:
660665
if __name__ == "__main__":
661666
doctest.testmod()
662667
main()
663-

machine_learning/pca_from_scratch.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,7 @@ def fit(self, x: np.ndarray) -> "PCAFromScratch":
159159
f"n_components={self.n_components} cannot be larger than "
160160
f"min(n_samples, n_features)={min(n_samples, n_features)}"
161161
)
162-
raise ValueError(
163-
msg
164-
)
162+
raise ValueError(msg)
165163

166164
# Standardize the data
167165
x_standardized = self._standardize_data(x)
@@ -173,14 +171,12 @@ def fit(self, x: np.ndarray) -> "PCAFromScratch":
173171
eigenvalues, eigenvectors = self._eigenvalue_decomposition(covariance_matrix)
174172

175173
# Select the top n_components
176-
self.components_ = eigenvectors[:, :self.n_components]
177-
self.explained_variance_ = eigenvalues[:self.n_components]
174+
self.components_ = eigenvectors[:, : self.n_components]
175+
self.explained_variance_ = eigenvalues[: self.n_components]
178176

179177
# Calculate explained variance ratio
180178
total_variance = np.sum(eigenvalues)
181-
self.explained_variance_ratio_ = (
182-
self.explained_variance_ / total_variance
183-
)
179+
self.explained_variance_ratio_ = self.explained_variance_ / total_variance
184180

185181
return self
186182

@@ -327,7 +323,7 @@ def main() -> None:
327323
print(f"\nReconstruction error (MSE): {reconstruction_error:.6f}")
328324

329325
# Compare with sklearn
330-
print("\n" + "="*50)
326+
print("\n" + "=" * 50)
331327
print("Comparison with scikit-learn:")
332328
compare_with_sklearn()
333329

0 commit comments

Comments
 (0)