Skip to content

Commit c2122b0

Browse files
committed
refactor: use descriptive parameter names instead of 'x'
- Rename 'x' to 'embeddings' in layer_norm, feedforward_network, and transformer_encoder_block functions - Update all docstring examples to use 'embeddings' - Improves code readability per algorithms-keeper bot feedback - Fix noqa comment placement for unused num_heads parameter - All doctests and ruff checks pass
1 parent 88bfa98 commit c2122b0

1 file changed

Lines changed: 34 additions & 28 deletions

File tree

computer_vision/vision_transformer.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -214,63 +214,65 @@ def attention_mechanism(
214214
return output, attention_weights
215215

216216

217-
def layer_norm(x: np.ndarray, epsilon: float = 1e-6) -> np.ndarray:
217+
def layer_norm(embeddings: np.ndarray, epsilon: float = 1e-6) -> np.ndarray:
218218
"""
219219
Apply Layer Normalization.
220220
221221
Args:
222-
x: Input array of shape (seq_len, embedding_dim)
222+
embeddings: Input array of shape (seq_len, embedding_dim)
223223
epsilon: Small constant for numerical stability (default: 1e-6)
224224
225225
Returns:
226226
Normalized array of same shape as input
227227
228228
Examples:
229-
>>> x = np.random.rand(10, 768)
230-
>>> normalized = layer_norm(x)
229+
>>> embeddings = np.random.rand(10, 768)
230+
>>> normalized = layer_norm(embeddings)
231231
>>> normalized.shape
232232
(10, 768)
233233
>>> np.allclose(normalized.mean(axis=1), 0.0, atol=1e-6)
234234
True
235235
>>> np.allclose(normalized.std(axis=1), 1.0, atol=1e-6)
236236
True
237237
"""
238-
mean = x.mean(axis=-1, keepdims=True)
239-
std = x.std(axis=-1, keepdims=True)
240-
return (x - mean) / (std + epsilon)
238+
mean = embeddings.mean(axis=-1, keepdims=True)
239+
std = embeddings.std(axis=-1, keepdims=True)
240+
return (embeddings - mean) / (std + epsilon)
241241

242242

243-
def feedforward_network(x: np.ndarray, hidden_dim: int = 3072) -> np.ndarray:
243+
def feedforward_network(
244+
embeddings: np.ndarray, hidden_dim: int = 3072
245+
) -> np.ndarray:
244246
"""
245247
Apply position-wise feed-forward network.
246248
247249
FFN(x) = max(0, xW1 + b1)W2 + b2
248250
249251
Args:
250-
x: Input array of shape (seq_len, embedding_dim)
252+
embeddings: Input array of shape (seq_len, embedding_dim)
251253
hidden_dim: Hidden dimension size (default: 3072, typically 4x embedding_dim)
252254
253255
Returns:
254256
Output array of shape (seq_len, embedding_dim)
255257
256258
Examples:
257-
>>> x = np.random.rand(10, 768)
258-
>>> output = feedforward_network(x, hidden_dim=3072)
259+
>>> embeddings = np.random.rand(10, 768)
260+
>>> output = feedforward_network(embeddings, hidden_dim=3072)
259261
>>> output.shape
260262
(10, 768)
261263
262-
>>> x = np.random.rand(197, 512)
263-
>>> output = feedforward_network(x, hidden_dim=2048)
264+
>>> embeddings = np.random.rand(197, 512)
265+
>>> output = feedforward_network(embeddings, hidden_dim=2048)
264266
>>> output.shape
265267
(197, 512)
266268
"""
267-
embedding_dim = x.shape[1]
269+
embedding_dim = embeddings.shape[1]
268270
rng = np.random.default_rng()
269271

270272
# First linear layer
271273
w1 = rng.standard_normal((embedding_dim, hidden_dim)) * 0.02
272274
b1 = np.zeros(hidden_dim)
273-
hidden = x @ w1 + b1
275+
hidden = embeddings @ w1 + b1
274276

275277
# GELU activation (approximation)
276278
gelu_factor = np.sqrt(2 / np.pi) * (hidden + 0.044715 * hidden**3)
@@ -285,9 +287,9 @@ def feedforward_network(x: np.ndarray, hidden_dim: int = 3072) -> np.ndarray:
285287

286288

287289
def transformer_encoder_block(
288-
x: np.ndarray,
289-
num_heads: int = 12,
290-
hidden_dim: int = 3072, # noqa: ARG001
290+
embeddings: np.ndarray,
291+
num_heads: int = 12, # noqa: ARG001
292+
hidden_dim: int = 3072,
291293
) -> np.ndarray:
292294
"""
293295
Apply a single Transformer encoder block.
@@ -297,39 +299,43 @@ def transformer_encoder_block(
297299
2. Feed-forward network with residual connection and layer norm
298300
299301
Args:
300-
x: Input array of shape (seq_len, embedding_dim)
302+
embeddings: Input array of shape (seq_len, embedding_dim)
301303
num_heads: Number of attention heads (default: 12, kept for API)
302304
hidden_dim: Hidden dimension for FFN (default: 3072)
303305
304306
Returns:
305307
Output array of shape (seq_len, embedding_dim)
306308
307309
Examples:
308-
>>> x = np.random.rand(197, 768)
309-
>>> output = transformer_encoder_block(x, num_heads=12, hidden_dim=3072)
310+
>>> embeddings = np.random.rand(197, 768)
311+
>>> output = transformer_encoder_block(
312+
... embeddings, num_heads=12, hidden_dim=3072
313+
... )
310314
>>> output.shape
311315
(197, 768)
312316
313-
>>> x = np.random.rand(50, 512)
314-
>>> output = transformer_encoder_block(x, num_heads=8, hidden_dim=2048)
317+
>>> embeddings = np.random.rand(50, 512)
318+
>>> output = transformer_encoder_block(
319+
... embeddings, num_heads=8, hidden_dim=2048
320+
... )
315321
>>> output.shape
316322
(50, 512)
317323
"""
318324
# Multi-head self-attention (simplified - using single head for demonstration)
319325
# In practice, this would split into multiple heads
320326
# num_heads parameter is kept for API compatibility
321-
attention_output, _ = attention_mechanism(x, x, x)
327+
attention_output, _ = attention_mechanism(embeddings, embeddings, embeddings)
322328

323329
# Add residual connection and apply layer norm
324-
x = layer_norm(x + attention_output)
330+
embeddings = layer_norm(embeddings + attention_output)
325331

326332
# Feed-forward network
327-
ffn_output = feedforward_network(x, hidden_dim)
333+
ffn_output = feedforward_network(embeddings, hidden_dim)
328334

329335
# Add residual connection and apply layer norm
330-
x = layer_norm(x + ffn_output)
336+
embeddings = layer_norm(embeddings + ffn_output)
331337

332-
return x
338+
return embeddings
333339

334340

335341
def vision_transformer(

0 commit comments

Comments
 (0)