Zero-One_Classification_Alg/Zero_One_Classification.py at main · jackson-swain/Zero-One_Classification_Alg · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Import necessary libraries
import numpy as np                   # For numerical operations and array handling
import tensorflow as tf              # Core TensorFlow library for machine learning
from tensorflow.keras.models import Sequential  # For building neural network models
from tensorflow.keras.layers import Dense       # For fully connected neural network layers
import matplotlib.pyplot as plt      # For creating visualizations
import random                        # For generating random numbers

# Configure logging to suppress TensorFlow warnings
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)  # Only show error-level messages from TensorFlow

# Load the MNIST handwritten digits dataset that comes with TensorFlow
# x_train/x_test are the images, y_train/y_test are the corresponding digit labels (0-9)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Create boolean masks to select only digits 0 and 1 from the dataset
# np.where returns indices where the condition is True
train_filter = np.where((y_train == 0) | (y_train == 1))  # Get indices of 0s and 1s in training set
test_filter = np.where((y_test == 0) | (y_test == 1))     # Get indices of 0s and 1s in test set

# Apply the filters to keep only images and labels for digits 0 and 1
x_train, y_train = x_train[train_filter], y_train[train_filter]  # Filter training data
x_test, y_test = x_test[test_filter], y_test[test_filter]        # Filter test data

# Normalize pixel values to range [0,1] for better training
x_train = x_train / 255.0
x_test = x_test / 255.0

# Print the first image pixel values for inspection
print ('The first element of X is: ', x_train[0])

# Print the first and last labels to verify filtering worked
print ('The first element of y is: ', y_train[0])  # Should be 0 or 1
print ('The last element of y is: ', y_train[-1])  # Should be 0 or 1

# Print the dimensions of the filtered datasets to understand their structure
# Shape will be (number_of_filtered_images, 28, 28) for x_train
# Shape will be (number_of_filtered_images,) for y_train
print ('The shape of X is: ' + str(x_train.shape))
print ('The shape of y is: ' + str(y_train.shape))

# Create a figure with a 10x10 grid of subplots (100 total)
# figsize=(15, 15) sets the overall figure size to 15x15 inches
fig, axes = plt.subplots(10, 10, figsize=(15, 15))

# Convert the 2D array of subplot axes to a 1D array for easier iteration
axes_flat = axes.flatten()

# Loop to display 100 random digit images in the grid
for i in range(100):
    # Choose a random index within the range of the filtered dataset
    num = random.randint(0, len(x_train) - 1)

    # Display the image using grayscale colormap
    axes_flat[i].imshow(x_train[num], cmap='gray')

    # Add a title showing which digit (0 or 1) is displayed
    axes_flat[i].set_title(f"Digit: {y_train[num]}")

    # Remove the axis ticks and labels for cleaner display
    axes_flat[i].axis('off')

# Adjust the spacing between subplots to optimize layout
plt.tight_layout()

# Display the figure with all 100 images
plt.show()

# Reshape the input data from 2D images to 1D arrays
# Each image is 28x28 pixels, and we flatten it to a 784-element vector
x_train_reshaped = x_train.reshape(x_train.shape[0], 28*28)
x_test_reshaped = x_test.reshape(x_test.shape[0], 28*28)

print('Reshaped training data shape:', x_train_reshaped.shape)

# Create a Sequential model for binary classification of digits 0 and 1
# Sequential allows us to build a layer-by-layer neural network
model = Sequential(
    [
        # Input layer specifies the shape of our data - 784 features (28*28 pixels)
        tf.keras.Input(shape=(28*28,)),

        # First hidden layer with 25 neurons and sigmoid activation
        # Sigmoid squashes values between 0 and 1: f(x) = 1/(1+e^(-x))
        Dense(25, activation='sigmoid'),

        # Second hidden layer with 15 neurons and sigmoid activation
        # Reducing dimensionality as we go deeper into the network
        Dense(15, activation='sigmoid'),

        # Output layer with a single neuron and sigmoid activation
        # For binary classification (0 or 1), output will be probability between 0-1
        Dense(1,  activation='sigmoid')
    ], name = "my_model"
)

# Print a summary of the model architecture showing layers and parameters
model.summary()

# Extract individual layers from the model for detailed inspection
[layer1, layer2, layer3] = model.layers

# Get and print the weight matrices and bias vectors for each layer
# W matrices connect neurons between layers, b vectors are the bias terms
W1,b1 = layer1.get_weights()  # Weights between input and first hidden layer
W2,b2 = layer2.get_weights()  # Weights between first and second hidden layers
W3,b3 = layer3.get_weights()  # Weights between second hidden layer and output
print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")  # Should be (784, 25) and (25,)
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")  # Should be (25, 15) and (15,)
print(f"W3 shape = {W3.shape}, b3 shape = {b3.shape}")  # Should be (15, 1) and (1,)

# Compile the model with configuration for training
model.compile(
    # Binary cross-entropy is the standard loss function for binary classification
    # It measures how far the predicted probabilities are from the true labels
    loss=tf.keras.losses.BinaryCrossentropy(),

    # Adam optimizer adjusts learning rates adaptively for each parameter
    # 0.001 is the learning rate - controls how quickly parameters are updated
    optimizer=tf.keras.optimizers.Adam(0.001),

    # Track accuracy during training to monitor performance
    metrics=['accuracy']
)

# Train the model on our reshaped training data
history = model.fit(
    x_train_reshaped, y_train,  # Input features and target labels
    epochs=28,                  # Number of complete passes through the dataset
    validation_split=0.2        # Use 20% of training data to validate during training
)

# Evaluate model performance on the test dataset
test_loss, test_acc = model.evaluate(x_test_reshaped, y_test)
print(f"Test accuracy: {test_acc:.4f}")  # Show final test accuracy

# Create a figure for visualizing training progress
plt.figure(figsize=(12, 4))

# Plot training & validation accuracy over epochs
plt.subplot(1, 2, 1)  # First subplot in a 1x2 grid
plt.plot(history.history['accuracy'])      # Training accuracy line
plt.plot(history.history['val_accuracy'])  # Validation accuracy line
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')

# Plot training & validation loss over epochs
plt.subplot(1, 2, 2)  # Second subplot in a 1x2 grid
plt.plot(history.history['loss'])      # Training loss line
plt.plot(history.history['val_loss'])  # Validation loss line
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

# Adjust spacing between subplots
plt.tight_layout()

# Display the plots
plt.show()

# Predict the probability of predicting a known 1 or 0
prediction = model.predict(x_train[0].reshape(1,784))  # a zero
print(f" predicting a zero: {prediction}")
prediction = model.predict(x_train[12664].reshape(1,784))  # a one
print(f" predicting a one:  {prediction}")