-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_test.py
More file actions
117 lines (92 loc) · 4.55 KB
/
example_test.py
File metadata and controls
117 lines (92 loc) · 4.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Performance benchmark for NumPy vs. PyTorch LBP implementations.
This script demonstrates the significant speed advantage of batch processing
with PyTorch, especially on a GPU, compared to a traditional, iterative
approach with NumPy.
It measures the time taken to:
1. Process a batch of images one-by-one using a NumPy-based function in a loop.
2. Process the entire batch at once using a vectorized PyTorch function.
The results are verified to be identical, and a performance speedup factor
is calculated.
"""
from lib.lbplib import lbp_py, LbpPytorch
import numpy as np
import time
import torch
# --- Test Parameters ---
BATCH_SIZE = 64
ROWS = 128
COLS = 128
# --- Device Configuration ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print("-" * 30)
# --- Data Preparation ---
# Generate a batch of random uint8 images
print(f"Generating a batch of {BATCH_SIZE} random images ({ROWS}x{COLS})...")
batch_images = np.random.randint(0, 255, (BATCH_SIZE, ROWS, COLS), dtype=np.uint8)
print(f"Batch shape: {batch_images.shape}")
print("-" * 30)
# --- Test NumPy implementation (looping over batch) ---
print("Testing NumPy (processing image by image)...")
start_time = time.time()
# Pre-allocate a numpy array to store the results
ypy_batch = np.zeros_like(batch_images, dtype=np.uint8)
# Since lbp_py processes one image at a time, we must loop
for i in range(BATCH_SIZE):
ypy_batch[i, :, :] = lbp_py(batch_images[i, :, :])
elapsed_py = time.time() - start_time
print(f'NumPy - elapsed time for {BATCH_SIZE} images = {elapsed_py:.6f} seconds')
print("-" * 30)
# --- Test PyTorch implementation using LbpPytorch module ---
print("Testing PyTorch LbpPytorch module (processing entire batch at once)...")
# Create an instance of LbpPytorch module with NCHW format
lbp_module = LbpPytorch(input_format='NCHW').to(device)
# Set the module to evaluation mode (good practice, though LBP has no learnable params)
lbp_module.eval()
start_time = time.time()
# Convert NumPy batch to PyTorch tensor and add channel dimension
# Shape: (BATCH_SIZE, ROWS, COLS) -> (BATCH_SIZE, 1, ROWS, COLS)
batch_torch = torch.from_numpy(batch_images).unsqueeze(1).to(device)
# Process the entire batch using the LbpPytorch module
with torch.no_grad(): # Disable gradient computation for inference
ypt_batch_tensor = lbp_module(batch_torch)
# Convert result back to NumPy and squeeze the channel dimension
# Shape: (BATCH_SIZE, 1, ROWS, COLS) -> (BATCH_SIZE, ROWS, COLS)
ypt_batch = ypt_batch_tensor.squeeze(1).cpu().numpy()
# Convert from normalized [0, 1] to uint8 [0, 255] to match NumPy output
ypt_batch = (ypt_batch * 255).astype(np.uint8)
elapsed_pt = time.time() - start_time
print(f'PyTorch - elapsed time for {BATCH_SIZE} images = {elapsed_pt:.6f} seconds')
print("-" * 30)
# --- Verification and Performance Comparison ---
# Calculate the total absolute error between the two results
# Casting to a larger int prevents potential overflow issues during subtraction
error = np.sum(np.abs(ypt_batch.astype(np.int64) - ypy_batch.astype(np.int64)))
print(f'Total absolute error between NumPy and PyTorch batches: {error}')
if error == 0:
print("Success! The results are identical.")
else:
print("Error! The results are different.")
# Additional debugging info
print(f"NumPy output shape: {ypy_batch.shape}, dtype: {ypy_batch.dtype}")
print(f"PyTorch output shape: {ypt_batch.shape}, dtype: {ypt_batch.dtype}")
print(f"NumPy output range: [{ypy_batch.min()}, {ypy_batch.max()}]")
print(f"PyTorch output range: [{ypt_batch.min()}, {ypt_batch.max()}]")
# Show a small sample difference for debugging
diff = np.abs(ypt_batch.astype(np.int64) - ypy_batch.astype(np.int64))
max_diff_idx = np.unravel_index(np.argmax(diff), diff.shape)
print(f"Max difference of {diff[max_diff_idx]} at position {max_diff_idx}")
print(f"NumPy value: {ypy_batch[max_diff_idx]}, PyTorch value: {ypt_batch[max_diff_idx]}")
# Calculate the speedup factor
if elapsed_pt > 0 and elapsed_py > 0:
speedup = elapsed_py / elapsed_pt
print(f'PyTorch is approximately {speedup:.2f}x faster than the NumPy loop for a batch of {BATCH_SIZE}.')
# --- Additional Module Information ---
print("-" * 30)
print("LbpPytorch Module Information:")
print(f"Input format: {lbp_module.input_format}")
print(f"Module parameters: {sum(p.numel() for p in lbp_module.parameters())} (should be 0 for LBP)")
print(f"Module is on device: {next(iter(lbp_module.parameters())).device if list(lbp_module.parameters()) else device}")