local_binary_patterns_lib/example_test.py at main · dwday/local_binary_patterns_lib · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Performance benchmark for NumPy vs. PyTorch LBP implementations.

This script demonstrates the significant speed advantage of batch processing
with PyTorch, especially on a GPU, compared to a traditional, iterative
approach with NumPy.

It measures the time taken to:
1.  Process a batch of images one-by-one using a NumPy-based function in a loop.
2.  Process the entire batch at once using a vectorized PyTorch function.

The results are verified to be identical, and a performance speedup factor
is calculated.
"""

from lib.lbplib import lbp_py, LbpPytorch
import numpy as np
import time
import torch

# --- Test Parameters ---
BATCH_SIZE = 64
ROWS = 128
COLS = 128

# --- Device Configuration ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print("-" * 30)

# --- Data Preparation ---
# Generate a batch of random uint8 images
print(f"Generating a batch of {BATCH_SIZE} random images ({ROWS}x{COLS})...")
batch_images = np.random.randint(0, 255, (BATCH_SIZE, ROWS, COLS), dtype=np.uint8)
print(f"Batch shape: {batch_images.shape}")
print("-" * 30)

# --- Test NumPy implementation (looping over batch) ---
print("Testing NumPy (processing image by image)...")
start_time = time.time()

# Pre-allocate a numpy array to store the results
ypy_batch = np.zeros_like(batch_images, dtype=np.uint8)

# Since lbp_py processes one image at a time, we must loop
for i in range(BATCH_SIZE):
    ypy_batch[i, :, :] = lbp_py(batch_images[i, :, :])

elapsed_py = time.time() - start_time
print(f'NumPy - elapsed time for {BATCH_SIZE} images = {elapsed_py:.6f} seconds')
print("-" * 30)

# --- Test PyTorch implementation using LbpPytorch module ---
print("Testing PyTorch LbpPytorch module (processing entire batch at once)...")

# Create an instance of LbpPytorch module with NCHW format
lbp_module = LbpPytorch(input_format='NCHW').to(device)

# Set the module to evaluation mode (good practice, though LBP has no learnable params)
lbp_module.eval()

start_time = time.time()

# Convert NumPy batch to PyTorch tensor and add channel dimension
# Shape: (BATCH_SIZE, ROWS, COLS) -> (BATCH_SIZE, 1, ROWS, COLS)
batch_torch = torch.from_numpy(batch_images).unsqueeze(1).to(device)

# Process the entire batch using the LbpPytorch module
with torch.no_grad():  # Disable gradient computation for inference
    ypt_batch_tensor = lbp_module(batch_torch)

# Convert result back to NumPy and squeeze the channel dimension
# Shape: (BATCH_SIZE, 1, ROWS, COLS) -> (BATCH_SIZE, ROWS, COLS)
ypt_batch = ypt_batch_tensor.squeeze(1).cpu().numpy()

# Convert from normalized [0, 1] to uint8 [0, 255] to match NumPy output
ypt_batch = (ypt_batch * 255).astype(np.uint8)

elapsed_pt = time.time() - start_time
print(f'PyTorch - elapsed time for {BATCH_SIZE} images = {elapsed_pt:.6f} seconds')
print("-" * 30)

# --- Verification and Performance Comparison ---
# Calculate the total absolute error between the two results
# Casting to a larger int prevents potential overflow issues during subtraction
error = np.sum(np.abs(ypt_batch.astype(np.int64) - ypy_batch.astype(np.int64)))
print(f'Total absolute error between NumPy and PyTorch batches: {error}')

if error == 0:
    print("Success! The results are identical.")
else:
    print("Error! The results are different.")
    # Additional debugging info
    print(f"NumPy output shape: {ypy_batch.shape}, dtype: {ypy_batch.dtype}")
    print(f"PyTorch output shape: {ypt_batch.shape}, dtype: {ypt_batch.dtype}")
    print(f"NumPy output range: [{ypy_batch.min()}, {ypy_batch.max()}]")
    print(f"PyTorch output range: [{ypt_batch.min()}, {ypt_batch.max()}]")

    # Show a small sample difference for debugging
    diff = np.abs(ypt_batch.astype(np.int64) - ypy_batch.astype(np.int64))
    max_diff_idx = np.unravel_index(np.argmax(diff), diff.shape)
    print(f"Max difference of {diff[max_diff_idx]} at position {max_diff_idx}")
    print(f"NumPy value: {ypy_batch[max_diff_idx]}, PyTorch value: {ypt_batch[max_diff_idx]}")

# Calculate the speedup factor
if elapsed_pt > 0 and elapsed_py > 0:
    speedup = elapsed_py / elapsed_pt
    print(f'PyTorch is approximately {speedup:.2f}x faster than the NumPy loop for a batch of {BATCH_SIZE}.')

# --- Additional Module Information ---
print("-" * 30)
print("LbpPytorch Module Information:")
print(f"Input format: {lbp_module.input_format}")
print(f"Module parameters: {sum(p.numel() for p in lbp_module.parameters())} (should be 0 for LBP)")
print(f"Module is on device: {next(iter(lbp_module.parameters())).device if list(lbp_module.parameters()) else device}")