Skip to content

Commit fb0fdb4

Browse files
committed
Updated with descriptive variables
1 parent 238f54d commit fb0fdb4

File tree

1 file changed

+96
-76
lines changed

1 file changed

+96
-76
lines changed
Lines changed: 96 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,149 +1,168 @@
11
import doctest
2-
32
import numpy as np
43
from numpy import ndarray
54
from sklearn.datasets import load_iris
65

76

87
def collect_dataset() -> tuple[ndarray, ndarray]:
98
"""
10-
Load Iris dataset and return features and labels.
9+
Load the Iris dataset and return features and labels.
10+
1111
Returns:
12-
tuple[ndarray, ndarray]: feature matrix and target labels
12+
tuple[ndarray, ndarray]: Feature matrix and target labels.
13+
1314
Example:
14-
>>> x, y = collect_dataset()
15-
>>> x.shape
16-
(150, 4)
17-
>>> y.shape
18-
(150,)
15+
>>> features, targets = collect_dataset()
16+
>>> features.shape
17+
(150, 4)
18+
>>> targets.shape
19+
(150,)
1920
"""
20-
data = load_iris()
21-
return np.array(data.data), np.array(data.target)
21+
iris_dataset = load_iris()
22+
return np.array(iris_dataset.data), np.array(iris_dataset.target)
2223

2324

24-
def compute_pairwise_affinities(data_x: ndarray, sigma: float = 1.0) -> ndarray:
25+
def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray:
2526
"""
26-
Compute high-dimensional affinities (P matrix) using Gaussian kernel.
27+
Compute high-dimensional affinities (P matrix) using a Gaussian kernel.
28+
2729
Args:
28-
data_x: Input data of shape (n_samples, n_features)
29-
sigma: Gaussian kernel bandwidth
30+
data_matrix: Input data of shape (n_samples, n_features).
31+
sigma: Gaussian kernel bandwidth.
32+
3033
Returns:
31-
ndarray: Symmetrized probability matrix
34+
ndarray: Symmetrized probability matrix.
35+
3236
Example:
33-
>>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
34-
>>> p = compute_pairwise_affinities(x)
35-
>>> float(round(p[0, 1], 3))
36-
0.25
37+
>>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
38+
>>> probabilities = compute_pairwise_affinities(x)
39+
>>> float(round(probabilities[0, 1], 3))
40+
0.25
3741
"""
38-
n_samples = data_x.shape[0]
39-
sum_x = np.sum(np.square(data_x), axis=1)
40-
dist_sq = np.add(np.add(-2 * np.dot(data_x, data_x.T), sum_x).T, sum_x)
41-
p = np.exp(-dist_sq / (2 * sigma**2))
42-
np.fill_diagonal(p, 0)
43-
p /= np.sum(p)
44-
return (p + p.T) / (2 * n_samples)
42+
n_samples = data_matrix.shape[0]
43+
squared_sum = np.sum(np.square(data_matrix), axis=1)
44+
squared_distance = np.add(np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum)
45+
46+
affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
47+
np.fill_diagonal(affinity_matrix, 0)
48+
49+
affinity_matrix /= np.sum(affinity_matrix)
50+
return (affinity_matrix + affinity_matrix.T) / (2 * n_samples)
4551

4652

47-
def compute_low_dim_affinities(low_dim_embedding: ndarray) -> tuple[ndarray, ndarray]:
53+
def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]:
4854
"""
49-
Compute low-dimensional affinities (Q matrix) using Student-t distribution.
55+
Compute low-dimensional affinities (Q matrix) using a Student-t distribution.
56+
5057
Args:
51-
low_dim_embedding: shape (n_samples, n_components)
58+
embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components).
59+
5260
Returns:
53-
tuple[ndarray, ndarray]: Q probability matrix and numerator
61+
tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
62+
5463
Example:
55-
>>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
56-
>>> q, num = compute_low_dim_affinities(y)
57-
>>> q.shape
58-
(2, 2)
64+
>>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
65+
>>> q_matrix, numerators = compute_low_dim_affinities(y)
66+
>>> q_matrix.shape
67+
(2, 2)
5968
"""
60-
sum_y = np.sum(np.square(low_dim_embedding), axis=1)
61-
numerator = 1 / (
69+
squared_sum = np.sum(np.square(embedding_matrix), axis=1)
70+
numerator_matrix = 1 / (
6271
1
6372
+ np.add(
64-
np.add(-2 * np.dot(low_dim_embedding, low_dim_embedding.T), sum_y).T,
65-
sum_y,
73+
np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T,
74+
squared_sum,
6675
)
6776
)
68-
np.fill_diagonal(numerator, 0)
69-
q = numerator / np.sum(numerator)
70-
return q, numerator
77+
np.fill_diagonal(numerator_matrix, 0)
78+
79+
q_matrix = numerator_matrix / np.sum(numerator_matrix)
80+
return q_matrix, numerator_matrix
7181

7282

7383
def apply_tsne(
74-
data_x: ndarray,
84+
data_matrix: ndarray,
7585
n_components: int = 2,
7686
learning_rate: float = 200.0,
7787
n_iter: int = 500,
7888
) -> ndarray:
7989
"""
8090
Apply t-SNE for dimensionality reduction.
91+
8192
Args:
82-
data_x: Original dataset (features)
83-
n_components: Target dimension (2D or 3D)
84-
learning_rate: Step size for gradient descent
85-
n_iter: Number of iterations
93+
data_matrix: Original dataset (features).
94+
n_components: Target dimension (2D or 3D).
95+
learning_rate: Step size for gradient descent.
96+
n_iter: Number of iterations.
97+
8698
Returns:
87-
ndarray: Low-dimensional embedding of the data
99+
ndarray: Low-dimensional embedding of the data.
100+
88101
Example:
89-
>>> x, _ = collect_dataset()
90-
>>> y_emb = apply_tsne(x, n_components=2, n_iter=50)
91-
>>> y_emb.shape
92-
(150, 2)
102+
>>> features, _ = collect_dataset()
103+
>>> embedding = apply_tsne(features, n_components=2, n_iter=50)
104+
>>> embedding.shape
105+
(150, 2)
93106
"""
94107
if n_components < 1 or n_iter < 1:
95108
raise ValueError("n_components and n_iter must be >= 1")
96109

97-
n_samples = data_x.shape[0]
110+
n_samples = data_matrix.shape[0]
98111
rng = np.random.default_rng()
99-
y = rng.standard_normal((n_samples, n_components)) * 1e-4
112+
embedding = rng.standard_normal((n_samples, n_components)) * 1e-4
100113

101-
p = compute_pairwise_affinities(data_x)
102-
p = np.maximum(p, 1e-12)
114+
high_dim_affinities = compute_pairwise_affinities(data_matrix)
115+
high_dim_affinities = np.maximum(high_dim_affinities, 1e-12)
103116

104-
y_inc = np.zeros_like(y)
117+
embedding_increment = np.zeros_like(embedding)
105118
momentum = 0.5
106119

107-
for i in range(n_iter):
108-
q, num = compute_low_dim_affinities(y)
109-
q = np.maximum(q, 1e-12)
120+
for iteration in range(n_iter):
121+
low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding)
122+
low_dim_affinities = np.maximum(low_dim_affinities, 1e-12)
110123

111-
pq = p - q
112-
d_y = 4 * (
113-
np.dot((pq * num), y)
114-
- np.multiply(np.sum(pq * num, axis=1)[:, np.newaxis], y)
124+
affinity_diff = high_dim_affinities - low_dim_affinities
125+
126+
gradient = 4 * (
127+
np.dot((affinity_diff * numerator_matrix), embedding)
128+
- np.multiply(np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], embedding)
115129
)
116130

117-
y_inc = momentum * y_inc - learning_rate * d_y
118-
y += y_inc
131+
embedding_increment = momentum * embedding_increment - learning_rate * gradient
132+
embedding += embedding_increment
119133

120-
if i == int(n_iter / 4):
134+
if iteration == int(n_iter / 4):
121135
momentum = 0.8
122136

123-
return y
137+
return embedding
124138

125139

126140
def main() -> None:
127141
"""
128-
Run t-SNE on Iris dataset and display the first 5 embeddings.
142+
Run t-SNE on the Iris dataset and display the first 5 embeddings.
143+
129144
Example:
130-
>>> main() # doctest: +ELLIPSIS
131-
t-SNE embedding (first 5 points):
132-
[[...
145+
>>> main() # doctest: +ELLIPSIS
146+
t-SNE embedding (first 5 points):
147+
[[...
133148
"""
134-
data_x, labels = collect_dataset()
149+
data_x,labels = collect_dataset()
135150
y_emb = apply_tsne(data_x, n_components=2, n_iter=300)
136151

137-
if not isinstance(y_emb, np.ndarray):
152+
if not isinstance(embedding, np.ndarray):
138153
raise TypeError("t-SNE embedding must be an ndarray")
139154

140155
print("t-SNE embedding (first 5 points):")
141-
print(y_emb[:5])
156+
print(embedding[:5])
142157

143158
# Optional visualization ( Ruff/mypy compliant)
144159
import matplotlib.pyplot as plt
145-
146-
plt.scatter(y_emb[:, 0], y_emb[:, 1], c=labels, cmap="viridis")
160+
plt.scatter(
161+
y_emb[:, 0],
162+
y_emb[:, 1],
163+
c=labels,
164+
cmap="viridis"
165+
)
147166
plt.title("t-SNE Visualization of Iris Dataset")
148167
plt.xlabel("Dimension 1")
149168
plt.ylabel("Dimension 2")
@@ -153,3 +172,4 @@ def main() -> None:
153172
if __name__ == "__main__":
154173
doctest.testmod()
155174
main()
175+

0 commit comments

Comments
 (0)