makiisthebes commited on
Commit
61f0100
1 Parent(s): 35d4777

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mnist_dataset/t10k-images.idx3-ubyte filter=lfs diff=lfs merge=lfs -text
37
+ mnist_dataset/train-images.idx3-ubyte filter=lfs diff=lfs merge=lfs -text
best_model.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.9906
le_net_learning_mnist.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Rewriting the LeNet model to learn the MNIST dataset and save the model parameters,
2
+ # This is considered something we should do in Week 3 of the Deep Learning and Computer Vision course.
3
+
4
+ # We will implement LeNet-5 architecture to learn the MNIST dataset.
5
+
6
+ from torchvision.transforms import ToTensor
7
+ # from torchvision.transforms import v2
8
+ from torchvision import transforms
9
+ from torch.utils.data import DataLoader
10
+ from torch.utils.data import Dataset
11
+ from torchvision import datasets
12
+ import matplotlib.pyplot as plt
13
+ from PIL import Image
14
+ from time import time
15
+ from torch import nn
16
+ import pandas as pd
17
+ import numpy as np
18
+ import torch, os
19
+ from utils import ApplyEnhancementFilter
20
+
21
+ # Load device first (GPU or CPU)
22
+ device = (
23
+ "cuda"
24
+ if torch.cuda.is_available()
25
+ else "mps"
26
+ if torch.backends.mps.is_available()
27
+ else "cpu"
28
+ )
29
+ print(f"Using {device} device for training/inference.")
30
+ if device == "cuda":
31
+ print(f"GPU being used: {torch.cuda.get_device_name(0)}")
32
+
33
+
34
+ train_transform = transforms.Compose([
35
+ # Data augmentation transformations
36
+ # ApplyEnhancementFilter(out_channels=1, kernel_size=3, stride=1, padding=1),
37
+ transforms.RandomAffine(degrees=35, translate=(0.1, 0.1), scale=(0.9, 1.1)),
38
+ transforms.RandomRotation(degrees=35),
39
+ # Convert images to tensors and normalize
40
+ transforms.ToTensor(),
41
+ transforms.Normalize((0.13066047430038452,), (0.30810782313346863,)),
42
+ # Pad the image to make it 32x32
43
+ transforms.Pad(2, fill=0, padding_mode='constant'),
44
+ ])
45
+
46
+ # For the test dataset, you should not apply these augmentations
47
+ test_transform = transforms.Compose([
48
+ transforms.ToTensor(),
49
+ transforms.Normalize((0.13066047430038452,), (0.30810782313346863,)),
50
+ transforms.Pad(2, fill=0, padding_mode='constant'),
51
+ ])
52
+
53
+
54
+ # Load the MNIST dataset which is 32x32x1 images (black and white ~ 1 channel)
55
+
56
+ # http://yann.lecun.com/exdb/mnist/
57
+ # datasets.MNIST
58
+
59
+ # Loading from Dataset and DataLoader, https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
60
+ # Load using known datasets, but what if we have our own dataset?
61
+ # training_data = datasets.MNIST(
62
+ # root="data",
63
+ # train=True,
64
+ # download=True,
65
+ # transform=ToTensor()
66
+ # )
67
+ #
68
+ # test_data = datasets.MNIST(
69
+ # root="data",
70
+ # train=False,
71
+ # download=True,
72
+ # transform=ToTensor()
73
+ # )
74
+
75
+ # Loading from a custom dataset
76
+ import idx2numpy
77
+ class CustomImageDataset(Dataset):
78
+ """
79
+ This class must inherit from the torch.utils.data.Dataset class.
80
+ And contina functions __init__, __len__, and __getitem__.
81
+ """
82
+ def __init__(self, annotations_file, image_file, transform=None, target_transform=None):
83
+ self.img_labels = idx2numpy.convert_from_file(annotations_file)
84
+ self.images = idx2numpy.convert_from_file(image_file)
85
+ self.transform = transform
86
+ self.target_transform = target_transform
87
+
88
+ def __len__(self):
89
+ return len(self.img_labels)
90
+
91
+ def __getitem__(self, idx):
92
+ """Get the image and label at the index idx."""
93
+ label = self.img_labels[idx]
94
+ img = self.images[idx]
95
+ img = Image.fromarray(img)
96
+
97
+ if self.transform:
98
+ img = self.transform(img)
99
+ if self.target_transform:
100
+ label = self.target_transform(label)
101
+ # Adding 0 padding to make it 32x32, as the model expects this.
102
+
103
+ # img = img.unsqueeze(0) # Add channel dimension, as model expects this.
104
+ return img, label # Return as float32, and label as int., should solve issue.
105
+
106
+
107
+ # Make the LeNet-5 model
108
+ class LeNet5Model(nn.Module):
109
+ def __init__(self):
110
+ super().__init__()
111
+ # Define activation, and sequential layers, then make forward pass.
112
+ self.tanh = nn.Tanh()
113
+ # Convolutional layers, https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
114
+ # Avg Pooling, https://pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html
115
+ self.le_stack = nn.Sequential(
116
+ nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
117
+ self.tanh,
118
+ nn.AvgPool2d(kernel_size=2, stride=2),
119
+ nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
120
+ self.tanh,
121
+ nn.AvgPool2d(kernel_size=2, stride=2),
122
+ nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
123
+ self.tanh
124
+ )
125
+ # Fully connected layers, https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
126
+ self.fc_stack = nn.Sequential(
127
+ nn.Linear(in_features=120, out_features=84),
128
+ self.tanh,
129
+ nn.Linear(in_features=84, out_features=10)
130
+ )
131
+
132
+ def forward(self, x):
133
+ """Forward pass of the model."""
134
+ x = self.le_stack(x)
135
+ x = x.reshape(x.shape[0], -1)
136
+ x = self.fc_stack(x)
137
+ return x
138
+
139
+
140
+ def train_model(model, train_loader, test_loader, epochs=10, learning_rate=0.001, saved_model=None):
141
+ """
142
+ Given a model, train the model using the train_loader and test_loader, and show metrics,
143
+ saving the best model parameters currently.
144
+ """
145
+ # When we have model, we need the loss function and optimizer we will use.
146
+ # Loss function, https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
147
+ loss_fn = nn.CrossEntropyLoss() # because we calculating probabilities and this is a classification problem.
148
+ # Optimizer, https://pytorch.org/docs/stable/optim.html
149
+ optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-6) # learning rate of 0.001
150
+ best_accuracy = 0.0
151
+ # See if best accuracy is saved, if so, get current best accuracy.
152
+ if os.path.exists("best_model.txt"):
153
+ with open("best_model.txt", "r") as file:
154
+ best_accuracy = float(file.read())
155
+
156
+ if saved_model is not None: # Load the model parameters if they exist.
157
+ model.load_state_dict(torch.load(saved_model))
158
+
159
+ # Training loop
160
+ for i in range(epochs):
161
+ model.train()
162
+ print("Epoch ", i)
163
+ for batch, (x, y) in enumerate(train_loader):
164
+
165
+ x, y = x.to(device), y.to(device)
166
+ # Forward pass
167
+
168
+ # print(x.shape, y.shape)
169
+ # Shape of x is [64, 28, 28] and y is [64,]
170
+ # But x needs to include the channels, so shape should be [64, 1, 28, 28]
171
+ # x = x.view(-1, 1, 32, 32)
172
+
173
+ y_pred = model(x)
174
+ # Compute loss
175
+ loss = loss_fn(y_pred, y)
176
+ # Zero gradients, backward pass, and update weights
177
+ optimizer.zero_grad()
178
+ loss.backward()
179
+ optimizer.step()
180
+ # Print loss
181
+ if batch % 250 == 0:
182
+ print(f"Epoch {i} batch {batch} loss: {loss.item()}")
183
+ # Evaluate the model
184
+ model.eval()
185
+ correct, total = 0, 0
186
+ with torch.no_grad():
187
+ for x, y in test_loader:
188
+ x, y = x.to(device), y.to(device)
189
+ #x = x.view(-1, 1, 32, 32)
190
+ y_pred = model(x)
191
+ _, predicted = torch.max(y_pred, 1)
192
+ total += y.size(0)
193
+ correct += (predicted == y).sum().item()
194
+ print(f"Epoch {i} accuracy: {correct/total}")
195
+ if correct/total > best_accuracy:
196
+ best_accuracy = correct/total
197
+ torch.save(model.state_dict(), "lenet_mnist_model.pth")
198
+ with open("best_model.txt", "w") as file:
199
+ file.write(f"{best_accuracy}")
200
+ print("Training complete.")
201
+
202
+
203
+ def init_weights(m):
204
+ if isinstance(m, nn.Conv2d):
205
+ nn.init.xavier_uniform_(m.weight)
206
+ if m.bias is not None:
207
+ m.bias.data.fill_(0.01)
208
+ elif isinstance(m, nn.Linear):
209
+ nn.init.xavier_uniform_(m.weight)
210
+ m.bias.data.fill_(0.01)
211
+
212
+ if __name__ == "__main__":
213
+ # Testing conversion from ubyte idx to numpy array
214
+
215
+ # file_name = "t10k-images.idx3-ubyte"
216
+ # label_file = "t10k-labels.idx1-ubyte"
217
+ # file_path = os.path.join("mnist_dataset", label_file)
218
+ # image_array = idx2numpy.convert_from_file(file_path)
219
+ # print(image_array.shape) # (10000, 28, 28) # 10000 images of 28x28 pixels
220
+
221
+
222
+ test_data = CustomImageDataset("mnist_dataset/t10k-labels.idx1-ubyte", "mnist_dataset/t10k-images.idx3-ubyte", transform=test_transform)
223
+ print((test_data[0])[0].shape, "label value", test_data[0][1]) # Getting image from dataset.
224
+ train_data = CustomImageDataset("mnist_dataset/train-labels.idx1-ubyte", "mnist_dataset/train-images.idx3-ubyte", transform=train_transform)
225
+
226
+ # Create a DataLoader, so we can iterate through the dataset in batches.
227
+ test_loader = DataLoader(test_data, batch_size=64, shuffle=True)
228
+ train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
229
+
230
+ # print(f"Output shape of train function, ", next(iter(test_loader))[0].shape) # [ 64x28x28 ] [64,] Image and labels.
231
+
232
+ # Display image and label. - From docs.
233
+ # train_features, train_labels = next(iter(train_loader))
234
+ # print(f"Feature batch shape: {train_features.size()}")
235
+ # print(f"Labels batch shape: {train_labels.size()}")
236
+ # img = train_features[0].squeeze()
237
+ # label = train_labels[0]
238
+ # plt.imshow(img, cmap="gray")
239
+ # plt.show()
240
+ # print(f"Label: {label}")
241
+
242
+ model = LeNet5Model().to(device)
243
+ model.apply(init_weights) # Apply Xavier initialisation to the model.
244
+ print(model)
245
+
246
+
247
+ # Training the model
248
+ train_model(model, train_loader, test_loader, epochs=1000, learning_rate=0.001)
249
+ # Save the model parameters
250
+ torch.save(model.state_dict(), "lenet_mnist_model.pth")
251
+
252
+ # Current errors include:
253
+ # - RuntimeError: Input type (unsigned char) and bias type (float) should be the same
254
+ # - I solved this by converting the image from customer loader to float32 values.
255
+ # - RuntimeError: Calculated padded input size per channel: (4 x 4). Kernel size: (5 x 5). Kernel size can't be greater than actual input size
256
+ # - I solved this by adding padding to make it 32x32 as the model expect this and dataset is 28x28.
257
+ # - The model also had problems when evaluating, it is important dims are batch x channels x height x width, and labels are int.
258
+
259
+ # Ways to improve accuracy:
260
+ # We will try to normalise the dataset via z-score, so values which are brighter are not given more importance. [98.99% accuracy]
261
+ # We can apply rotations and affine to potentially improve the model by making it learn more abstractly from specific patterns rather than exact same orientation.
262
+ # Xavier intialisation of CNN and FC layers, to prevent vanishing gradients.
263
+ # Increase the angle of rotation and affine transformations to see if it improves the model.
264
+ # We could potentally help the model by applying a enhancement filter (negative laplacian) from computer vision, to the image, inverse laplacian
265
+
266
+ # We do not know whether model is overfitting, as we do not have a graph of the training and validation loss.
lenet_mnist_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ff80605ac574e7e667ec532c8c4b94845e2b11c0c69c06feccd7d86dbab95f
3
+ size 250431
let_net_arch.png ADDED
mnist_dataset/t10k-images.idx3-ubyte ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa7898d509279e482958e8ce81c8e77db3f2f8254e26661ceb7762c4d494ce7
3
+ size 7840016
mnist_dataset/t10k-labels.idx1-ubyte ADDED
Binary file (10 kB). View file
 
mnist_dataset/train-images.idx3-ubyte ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba891046e6505d7aadcbbe25680a0738ad16aec93bde7f9b65e87a2fc25776db
3
+ size 47040016
mnist_dataset/train-labels.idx1-ubyte ADDED
Binary file (60 kB). View file
 
utils.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import idx2numpy, torch
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torchvision import transforms, datasets
6
+ from PIL import Image
7
+
8
+
9
+ class ApplyEnhancementFilter:
10
+ def __init__(self, out_channels, kernel_size, stride=1, padding=0, bias=False):
11
+ """
12
+ Initialize the convolution parameters.
13
+ """
14
+ self.out_channels = out_channels
15
+ self.kernel_size = kernel_size
16
+ self.stride = stride
17
+ self.padding = padding
18
+ self.bias = bias
19
+ # Define the convolutional layer (not trained here)
20
+ self.conv = nn.Conv2d(in_channels=1, # Adjust this based on your image channels (1 for grayscale, 3 for RGB)
21
+ out_channels=out_channels,
22
+ kernel_size=kernel_size,
23
+ stride=stride,
24
+ padding=padding,
25
+ bias=bias)
26
+
27
+ # Example: Manually defining a simple edge-detection kernel
28
+ # For a real use-case, the kernel weights would be learned or defined according to the filter you need.
29
+ edge_detection_kernel = torch.tensor([[0, -1., 0.],
30
+ [-1., 5., -1.],
31
+ [0., -1., 0.]]).unsqueeze(0).unsqueeze(0)
32
+ self.conv.weight = nn.Parameter(edge_detection_kernel.float())
33
+
34
+ def __call__(self, img):
35
+ """
36
+ Apply the convolution transformation.
37
+ """
38
+ # Convert PIL image to tensor
39
+ img_tensor = transforms.functional.to_tensor(img).unsqueeze(0) # Add batch dimension
40
+ # Apply convolution
41
+ conv_img = self.conv(img_tensor)
42
+ # Remove batch dimension and convert back to PIL image for further transformations or visualization
43
+ conv_img_pil = transforms.functional.to_pil_image(conv_img.squeeze(0))
44
+ return conv_img_pil
45
+
46
+
47
+ if __name__ == "__main__":
48
+ # It is important to normalise the dataset, so no specific input effects the model more than other based purely on input values.
49
+ # As values can range from 0-255, this can cause problems, so z-score will be used via Transforms.
50
+
51
+ # First we need the mean and standard deviation of train dataset.
52
+
53
+ train_images = idx2numpy.convert_from_file("mnist_dataset/train-images.idx3-ubyte")
54
+
55
+ # Convert the training images to a PyTorch tensor and scale values to [0, 1]
56
+ train_images_tensor = torch.tensor(train_images, dtype=torch.float32) / 255.0
57
+
58
+ train_mean = train_images_tensor.mean()
59
+ train_std = train_images_tensor.std()
60
+
61
+ print(f"Mean: {train_mean}, Std: {train_std}")
62
+ # Mean: 0.13066047430038452, Std: 0.30810782313346863