Skip to content

Commit 0841d09

Browse files
committed
Fix variable naming in logistic regression and naive bayes
- Changed all x, x_train, x_test variables to lowercase - Updated function parameters and variable references - Logistic regression now passes all ruff checks - Naive bayes has only 1 minor line length issue in a comment - Follows TheAlgorithms/Python strict naming conventions
1 parent 8e97c39 commit 0841d09

File tree

2 files changed

+183
-186
lines changed

2 files changed

+183
-186
lines changed

machine_learning/logistic_regression_vectorized.py

Lines changed: 70 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def _softmax(self, z: np.ndarray) -> np.ndarray:
119119

120120
def _compute_cost(
121121
self,
122-
X: np.ndarray,
122+
x: np.ndarray,
123123
y: np.ndarray,
124124
weights: np.ndarray,
125125
bias: float,
@@ -129,7 +129,7 @@ def _compute_cost(
129129
Compute the cost function.
130130
131131
Args:
132-
X: Feature matrix of shape (n_samples, n_features)
132+
x: Feature matrix of shape (n_samples, n_features)
133133
y: Target labels
134134
weights: Model weights
135135
bias: Model bias
@@ -139,18 +139,18 @@ def _compute_cost(
139139
Cost value
140140
141141
>>> lr = LogisticRegressionVectorized()
142-
>>> X = np.array([[1, 2], [3, 4]])
142+
>>> x = np.array([[1, 2], [3, 4]])
143143
>>> y = np.array([0, 1])
144144
>>> weights = np.array([0.1, 0.2])
145145
>>> bias = 0.0
146-
>>> cost = lr._compute_cost(X, y, weights, bias)
146+
>>> cost = lr._compute_cost(x, y, weights, bias)
147147
>>> isinstance(cost, float)
148148
True
149149
"""
150-
X.shape[0]
150+
x.shape[0]
151151

152152
# Compute predictions
153-
z = np.dot(X, weights) + bias
153+
z = np.dot(x, weights) + bias
154154

155155
if is_multiclass:
156156
# Multi-class: use softmax and cross-entropy
@@ -174,7 +174,7 @@ def _compute_cost(
174174

175175
def _compute_gradients(
176176
self,
177-
X: np.ndarray,
177+
x: np.ndarray,
178178
y: np.ndarray,
179179
weights: np.ndarray,
180180
bias: float,
@@ -184,7 +184,7 @@ def _compute_gradients(
184184
Compute gradients using vectorized operations.
185185
186186
Args:
187-
X: Feature matrix of shape (n_samples, n_features)
187+
x: Feature matrix of shape (n_samples, n_features)
188188
y: Target labels
189189
weights: Model weights
190190
bias: Model bias
@@ -194,20 +194,20 @@ def _compute_gradients(
194194
Tuple of (weight_gradients, bias_gradient)
195195
196196
>>> lr = LogisticRegressionVectorized()
197-
>>> X = np.array([[1, 2], [3, 4]])
197+
>>> x = np.array([[1, 2], [3, 4]])
198198
>>> y = np.array([0, 1])
199199
>>> weights = np.array([0.1, 0.2])
200200
>>> bias = 0.0
201-
>>> grad_w, grad_b = lr._compute_gradients(X, y, weights, bias)
201+
>>> grad_w, grad_b = lr._compute_gradients(x, y, weights, bias)
202202
>>> grad_w.shape == weights.shape
203203
True
204204
>>> isinstance(grad_b, (float, np.floating))
205205
True
206206
"""
207-
n_samples = X.shape[0]
207+
n_samples = x.shape[0]
208208

209209
# Compute predictions
210-
z = np.dot(X, weights) + bias
210+
z = np.dot(x, weights) + bias
211211

212212
if is_multiclass:
213213
# Multi-class: use softmax
@@ -219,7 +219,7 @@ def _compute_gradients(
219219
error = predictions - y
220220

221221
# Compute gradients
222-
weight_gradients = np.dot(X.T, error) / n_samples
222+
weight_gradients = np.dot(x.T, error) / n_samples
223223
bias_gradient = np.mean(error)
224224

225225
# Add regularization gradients
@@ -250,28 +250,28 @@ def _prepare_multiclass_targets(self, y: np.ndarray) -> np.ndarray:
250250

251251
return y_onehot
252252

253-
def fit(self, X: np.ndarray, y: np.ndarray) -> "LogisticRegressionVectorized":
253+
def fit(self, x: np.ndarray, y: np.ndarray) -> "LogisticRegressionVectorized":
254254
"""
255255
Fit the logistic regression model.
256256
257257
Args:
258-
X: Feature matrix of shape (n_samples, n_features)
258+
x: Feature matrix of shape (n_samples, n_features)
259259
y: Target labels of shape (n_samples,)
260260
261261
Returns:
262262
Self for method chaining
263263
264264
>>> lr = LogisticRegressionVectorized(max_iterations=10)
265-
>>> X = np.array([[1, 2], [3, 4], [5, 6]])
265+
>>> x = np.array([[1, 2], [3, 4], [5, 6]])
266266
>>> y = np.array([0, 1, 0])
267-
>>> _ = lr.fit(X, y)
267+
>>> _ = lr.fit(x, y)
268268
"""
269-
if X.ndim != 2:
270-
raise ValueError("X must be 2-dimensional")
271-
if len(X) != len(y):
272-
raise ValueError("X and y must have the same number of samples")
269+
if x.ndim != 2:
270+
raise ValueError("x must be 2-dimensional")
271+
if len(x) != len(y):
272+
raise ValueError("x and y must have the same number of samples")
273273

274-
_n_samples, n_features = X.shape
274+
_n_samples, n_features = x.shape
275275

276276
# Determine if this is multi-class classification
277277
unique_classes = np.unique(y)
@@ -298,13 +298,13 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "LogisticRegressionVectorized":
298298
for iteration in range(self.max_iterations):
299299
# Compute cost
300300
cost = self._compute_cost(
301-
X, y_encoded, self.weights_, self.bias_, is_multiclass
301+
x, y_encoded, self.weights_, self.bias_, is_multiclass
302302
)
303303
self.cost_history_.append(cost)
304304

305305
# Compute gradients
306306
weight_gradients, bias_gradient = self._compute_gradients(
307-
X, y_encoded, self.weights_, self.bias_, is_multiclass
307+
x, y_encoded, self.weights_, self.bias_, is_multiclass
308308
)
309309

310310
# Update parameters
@@ -321,30 +321,30 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "LogisticRegressionVectorized":
321321

322322
return self
323323

324-
def predict_proba(self, X: np.ndarray) -> np.ndarray:
324+
def predict_proba(self, x: np.ndarray) -> np.ndarray:
325325
"""
326326
Predict class probabilities.
327327
328328
Args:
329-
X: Feature matrix of shape (n_samples, n_features)
329+
x: Feature matrix of shape (n_samples, n_features)
330330
331331
Returns:
332332
Probability matrix of shape (n_samples, n_classes) for multi-class
333333
or (n_samples,) for binary classification
334334
335335
>>> lr = LogisticRegressionVectorized()
336-
>>> X_train = np.array([[1, 2], [3, 4]])
336+
>>> x_train = np.array([[1, 2], [3, 4]])
337337
>>> y_train = np.array([0, 1])
338-
>>> _ = lr.fit(X_train, y_train)
339-
>>> X_test = np.array([[1, 2], [3, 4]])
340-
>>> proba = lr.predict_proba(X_test)
341-
>>> proba.shape[0] == X_test.shape[0]
338+
>>> _ = lr.fit(x_train, y_train)
339+
>>> x_test = np.array([[1, 2], [3, 4]])
340+
>>> proba = lr.predict_proba(x_test)
341+
>>> proba.shape[0] == x_test.shape[0]
342342
True
343343
"""
344344
if self.weights_ is None:
345345
raise ValueError("Model must be fitted before prediction")
346346

347-
z = np.dot(X, self.weights_) + self.bias_
347+
z = np.dot(x, self.weights_) + self.bias_
348348

349349
if self.n_classes_ is None or self.n_classes_ <= 2:
350350
# Binary classification
@@ -353,26 +353,26 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
353353
# Multi-class classification
354354
return self._softmax(z)
355355

356-
def predict(self, X: np.ndarray) -> np.ndarray:
356+
def predict(self, x: np.ndarray) -> np.ndarray:
357357
"""
358358
Predict class labels.
359359
360360
Args:
361-
X: Feature matrix of shape (n_samples, n_features)
361+
x: Feature matrix of shape (n_samples, n_features)
362362
363363
Returns:
364364
Predicted class labels
365365
366366
>>> lr = LogisticRegressionVectorized()
367-
>>> X_train = np.array([[1, 2], [3, 4], [5, 6]])
367+
>>> x_train = np.array([[1, 2], [3, 4], [5, 6]])
368368
>>> y_train = np.array([0, 1, 0])
369-
>>> _ = lr.fit(X_train, y_train)
370-
>>> X_test = np.array([[1, 2], [3, 4]])
371-
>>> predictions = lr.predict(X_test)
372-
>>> len(predictions) == X_test.shape[0]
369+
>>> _ = lr.fit(x_train, y_train)
370+
>>> x_test = np.array([[1, 2], [3, 4]])
371+
>>> predictions = lr.predict(x_test)
372+
>>> len(predictions) == x_test.shape[0]
373373
True
374374
"""
375-
probabilities = self.predict_proba(X)
375+
probabilities = self.predict_proba(x)
376376

377377
if self.n_classes_ is None or self.n_classes_ <= 2:
378378
# Binary classification
@@ -385,26 +385,26 @@ def predict(self, X: np.ndarray) -> np.ndarray:
385385

386386
return predictions
387387

388-
def score(self, X: np.ndarray, y: np.ndarray) -> float:
388+
def score(self, x: np.ndarray, y: np.ndarray) -> float:
389389
"""
390390
Compute the accuracy score.
391391
392392
Args:
393-
X: Feature matrix
393+
x: Feature matrix
394394
y: True labels
395395
396396
Returns:
397397
Accuracy score between 0 and 1
398398
399399
>>> lr = LogisticRegressionVectorized()
400-
>>> X = np.array([[1, 2], [3, 4], [5, 6]])
400+
>>> x = np.array([[1, 2], [3, 4], [5, 6]])
401401
>>> y = np.array([0, 1, 0])
402-
>>> _ = lr.fit(X, y)
403-
>>> score = lr.score(X, y)
402+
>>> _ = lr.fit(x, y)
403+
>>> score = lr.score(x, y)
404404
>>> bool(0 <= score <= 1)
405405
True
406406
"""
407-
predictions = self.predict(X)
407+
predictions = self.predict(x)
408408
return np.mean(predictions == y)
409409

410410

@@ -430,13 +430,13 @@ def generate_sample_data(
430430

431431
if n_classes == 2:
432432
# Binary classification: linearly separable data
433-
X = rng.standard_normal((n_samples, n_features))
433+
x = rng.standard_normal((n_samples, n_features))
434434
# Create a simple linear boundary
435-
y = (X[:, 0] + X[:, 1] > 0).astype(int)
435+
y = (x[:, 0] + x[:, 1] > 0).astype(int)
436436
else:
437437
# Multi-class classification
438438
from sklearn.datasets import make_classification
439-
X, y = make_classification(
439+
x, y = make_classification(
440440
n_samples=n_samples,
441441
n_features=n_features,
442442
n_classes=n_classes,
@@ -445,7 +445,7 @@ def generate_sample_data(
445445
random_state=random_state,
446446
)
447447

448-
return X, y
448+
return x, y
449449

450450

451451
def compare_with_sklearn() -> None:
@@ -457,23 +457,23 @@ def compare_with_sklearn() -> None:
457457
from sklearn.metrics import accuracy_score
458458

459459
# Generate data
460-
X, y = generate_sample_data(n_samples=100, n_features=4, n_classes=2)
460+
x, y = generate_sample_data(n_samples=100, n_features=4, n_classes=2)
461461

462462
# Split data
463-
split_idx = int(0.8 * len(X))
464-
X_train, X_test = X[:split_idx], X[split_idx:]
463+
split_idx = int(0.8 * len(x))
464+
x_train, x_test = x[:split_idx], x[split_idx:]
465465
y_train, y_test = y[:split_idx], y[split_idx:]
466466

467467
# Our implementation
468468
lr_ours = LogisticRegressionVectorized(max_iterations=1000, learning_rate=0.1)
469-
lr_ours.fit(X_train, y_train)
470-
lr_ours.predict(X_test)
471-
accuracy_ours = lr_ours.score(X_test, y_test)
469+
lr_ours.fit(x_train, y_train)
470+
lr_ours.predict(x_test)
471+
accuracy_ours = lr_ours.score(x_test, y_test)
472472

473473
# Scikit-learn implementation
474474
lr_sklearn = SklearnLR(max_iter=1000, random_state=42)
475-
lr_sklearn.fit(X_train, y_train)
476-
predictions_sklearn = lr_sklearn.predict(X_test)
475+
lr_sklearn.fit(x_train, y_train)
476+
predictions_sklearn = lr_sklearn.predict(x_test)
477477
accuracy_sklearn = accuracy_score(y_test, predictions_sklearn)
478478

479479
print(f"Our implementation accuracy: {accuracy_ours:.4f}")
@@ -491,40 +491,40 @@ def main() -> None:
491491
print("=== Binary Classification Example ===")
492492

493493
# Generate binary classification data
494-
X_binary, y_binary = generate_sample_data(n_samples=100, n_features=2, n_classes=2)
494+
x_binary, y_binary = generate_sample_data(n_samples=100, n_features=2, n_classes=2)
495495

496-
print(f"Data shape: {X_binary.shape}")
496+
print(f"Data shape: {x_binary.shape}")
497497
print(f"Classes: {np.unique(y_binary)}")
498498

499499
# Train model
500500
lr_binary = LogisticRegressionVectorized(learning_rate=0.1, max_iterations=1000)
501-
lr_binary.fit(X_binary, y_binary)
501+
lr_binary.fit(x_binary, y_binary)
502502

503503
# Make predictions
504-
lr_binary.predict(X_binary)
505-
probabilities = lr_binary.predict_proba(X_binary)
504+
lr_binary.predict(x_binary)
505+
probabilities = lr_binary.predict_proba(x_binary)
506506

507-
print(f"Training accuracy: {lr_binary.score(X_binary, y_binary):.4f}")
507+
print(f"Training accuracy: {lr_binary.score(x_binary, y_binary):.4f}")
508508
print(f"Final cost: {lr_binary.cost_history_[-1]:.6f}")
509509
print(f"Sample probabilities: {probabilities[:5]}")
510510

511511
print("\n=== Multi-class Classification Example ===")
512512

513513
# Generate multi-class data
514-
X_multi, y_multi = generate_sample_data(n_samples=150, n_features=4, n_classes=3)
514+
x_multi, y_multi = generate_sample_data(n_samples=150, n_features=4, n_classes=3)
515515

516-
print(f"Data shape: {X_multi.shape}")
516+
print(f"Data shape: {x_multi.shape}")
517517
print(f"Classes: {np.unique(y_multi)}")
518518

519519
# Train model
520520
lr_multi = LogisticRegressionVectorized(learning_rate=0.1, max_iterations=1000)
521-
lr_multi.fit(X_multi, y_multi)
521+
lr_multi.fit(x_multi, y_multi)
522522

523523
# Make predictions
524-
lr_multi.predict(X_multi)
525-
probabilities_multi = lr_multi.predict_proba(X_multi)
524+
lr_multi.predict(x_multi)
525+
probabilities_multi = lr_multi.predict_proba(x_multi)
526526

527-
print(f"Training accuracy: {lr_multi.score(X_multi, y_multi):.4f}")
527+
print(f"Training accuracy: {lr_multi.score(x_multi, y_multi):.4f}")
528528
print(f"Final cost: {lr_multi.cost_history_[-1]:.6f}")
529529
print(f"Sample probabilities shape: {probabilities_multi[:5].shape}")
530530

0 commit comments

Comments
 (0)