Sendeky commited on
Commit
41eee5f
1 Parent(s): c1af00e

Upload 4 files

Browse files
Files changed (4) hide show
  1. CustomCIFAR10Dataset.py +23 -0
  2. cifar10.py +121 -0
  3. model.py +206 -0
  4. requirements.txt +2 -0
CustomCIFAR10Dataset.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from torch.utils.data import Dataset
3
+
4
+ # We have to make a custom dataset class to load them with the torch DataLoader
5
+ # Custom dataset class for CIFAR-10 images
6
+ class CustomCIFAR10Dataset(Dataset):
7
+ def __init__(self, images, labels, transform=None):
8
+ self.images = images
9
+ self.labels = labels
10
+ self.transform = transform
11
+
12
+ def __len__(self):
13
+ return len(self.images)
14
+
15
+ def __getitem__(self, index):
16
+ image = self.images[index]
17
+ label = self.labels[index]
18
+
19
+ # Apply the transformations (if any)
20
+ if self.transform is not None:
21
+ image = self.transform(image)
22
+
23
+ return image, label
cifar10.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2020 The TensorFlow Datasets Authors and the HuggingFace Datasets Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # Lint as: python3
17
+ """CIFAR-10 Data Set"""
18
+
19
+
20
+ import pickle
21
+
22
+ import numpy as np
23
+
24
+ import datasets
25
+ from datasets.tasks import ImageClassification
26
+
27
+
28
+ _CITATION = """\
29
+ @TECHREPORT{Krizhevsky09learningmultiple,
30
+ author = {Alex Krizhevsky},
31
+ title = {Learning multiple layers of features from tiny images},
32
+ institution = {},
33
+ year = {2009}
34
+ }
35
+ """
36
+
37
+ _DESCRIPTION = """\
38
+ The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images
39
+ per class. There are 50000 training images and 10000 test images.
40
+ """
41
+
42
+ _DATA_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
43
+
44
+ _NAMES = [
45
+ "airplane",
46
+ "automobile",
47
+ "bird",
48
+ "cat",
49
+ "deer",
50
+ "dog",
51
+ "frog",
52
+ "horse",
53
+ "ship",
54
+ "truck",
55
+ ]
56
+
57
+
58
+ class Cifar10(datasets.GeneratorBasedBuilder):
59
+ """CIFAR-10 Data Set"""
60
+
61
+ BUILDER_CONFIGS = [
62
+ datasets.BuilderConfig(
63
+ name="plain_text",
64
+ version=datasets.Version("1.0.0", ""),
65
+ description="Plain text import of CIFAR-10 Data Set",
66
+ )
67
+ ]
68
+
69
+ def _info(self):
70
+ return datasets.DatasetInfo(
71
+ description=_DESCRIPTION,
72
+ features=datasets.Features(
73
+ {
74
+ "img": datasets.Image(),
75
+ "label": datasets.features.ClassLabel(names=_NAMES),
76
+ }
77
+ ),
78
+ supervised_keys=("img", "label"),
79
+ homepage="https://www.cs.toronto.edu/~kriz/cifar.html",
80
+ citation=_CITATION,
81
+ task_templates=ImageClassification(image_column="img", label_column="label"),
82
+ )
83
+
84
+ def _split_generators(self, dl_manager):
85
+ archive = dl_manager.download(_DATA_URL)
86
+
87
+ return [
88
+ datasets.SplitGenerator(
89
+ name=datasets.Split.TRAIN, gen_kwargs={"files": dl_manager.iter_archive(archive), "split": "train"}
90
+ ),
91
+ datasets.SplitGenerator(
92
+ name=datasets.Split.TEST, gen_kwargs={"files": dl_manager.iter_archive(archive), "split": "test"}
93
+ ),
94
+ ]
95
+
96
+ def _generate_examples(self, files, split):
97
+ """This function returns the examples in the raw (text) form."""
98
+
99
+ if split == "train":
100
+ batches = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5"]
101
+
102
+ if split == "test":
103
+ batches = ["test_batch"]
104
+ batches = [f"cifar-10-batches-py/{filename}" for filename in batches]
105
+
106
+ for path, fo in files:
107
+
108
+ if path in batches:
109
+ dict = pickle.load(fo, encoding="bytes")
110
+
111
+ labels = dict[b"labels"]
112
+ images = dict[b"data"]
113
+
114
+ for idx, _ in enumerate(images):
115
+
116
+ img_reshaped = np.transpose(np.reshape(images[idx], (3, 32, 32)), (1, 2, 0))
117
+
118
+ yield f"{path}_{idx}", {
119
+ "img": img_reshaped,
120
+ "label": labels[idx],
121
+ }
model.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import datasets
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ import torch.optim as optim
7
+ import torchvision.transforms as transforms
8
+ from torch.utils.data import DataLoader, Dataset
9
+ from PIL import Image
10
+
11
+ from cifar10 import Cifar10
12
+ from CustomCIFAR10Dataset import CustomCIFAR10Dataset
13
+
14
+
15
+ # create dataset builder instance
16
+ cifar10_builder = Cifar10()
17
+ # downloads the dataset
18
+ cifar10_builder.download_and_prepare()
19
+
20
+ # generate the dataset ('train', 'test' portion)
21
+ train_data = cifar10_builder.as_dataset(split='train')
22
+ test_data = cifar10_builder.as_dataset(split='test')
23
+
24
+ train_images = train_data["img"]
25
+ train_labels = train_data["label"]
26
+
27
+ test_images = test_data["img"]
28
+ test_labels = test_data["label"]
29
+
30
+ # Cifar10 classes
31
+ classes = ("airplane", "automobile", "bird", "cat", "deer",
32
+ "dog", "frog", "horse", "ship", "truck")
33
+
34
+ # # we can plot and access the images like this
35
+ # from matplotlib import pyplot as plt
36
+ # from matplotlib import image as mpimg
37
+
38
+ # # doing index first and then "img" is faster because image is decoded immediately when chosen (index -> decoding is faster than decoding -> index)
39
+ # plt.imshow(train_ds[0]["img"])
40
+ # plt.show()
41
+
42
+ # PARAMETERS
43
+ # batch size during training
44
+ batch_size = 128
45
+
46
+ # image size
47
+ img_size = 32
48
+
49
+ # number of channels in image (3, because RGB in this case)
50
+ nc = 3
51
+
52
+ # output size (10 classes)
53
+ output = len(classes)
54
+
55
+ # Num of GPUs (pick 0 for CPU)
56
+ ngpu = 0
57
+
58
+ # number of workers
59
+ nw = 0
60
+
61
+ # number of training epochs
62
+ num_epochs = 5
63
+
64
+ # learning rate
65
+ learning_rate = 0.0022
66
+
67
+ # chooses which device to use
68
+ device = torch.device("cuda:0" if (torch.cuda.is_available()) and (ngpu > 0) else "cpu")
69
+
70
+ # transforms for image. CONVERT TO TENSOR VERY IMPORTANT, OTHERWISE DATALOADER WON"T ACCEPT IMAGE
71
+ transform = transforms.Compose([
72
+ transforms.Resize((32, 32)), # Resize the image to 32x32 (required for CIFAR-10)
73
+ transforms.ToTensor(), # Convert PIL Image to a tensor
74
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalize the image to [-1, 1]
75
+ ])
76
+
77
+ # We use our custsom cifar10 dataset class to convert the dataset to a format that the torch dataloader can use
78
+ train_ds = CustomCIFAR10Dataset(train_data["img"], train_data["label"], transform=transform)
79
+ test_ds = CustomCIFAR10Dataset(test_data["img"], test_data["label"], transform=transform)
80
+
81
+ # LOADERS FOR DATASET
82
+ train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=nw)
83
+ test_loader = DataLoader(test_ds, batch_size, shuffle=True, num_workers=nw)
84
+
85
+
86
+ # The nueral net class
87
+ class Net(nn.Module):
88
+ def __init__(self):
89
+ super(Net, self).__init__()
90
+
91
+ # placing all layers in nn.Sequential brought +4% accuracy improvement
92
+ self.network = nn.Sequential(
93
+ # first 2 concolutional layers
94
+ nn.Conv2d(nc, 16, kernel_size=3, stride=1, padding=1), # a convoltional layer with 3 input channels, 16 output channels,
95
+ # a kernel size of 3, a stride of 1, and padding of 1
96
+ nn.Conv2d(16, 64, kernel_size=3, stride=1, padding=1),
97
+ nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
98
+
99
+ # max pooling layers
100
+ nn.MaxPool2d(kernel_size=2, stride=2), # a max pooling layer with kernel size of 3 and stride of 1
101
+ # helps reduce spatial dimensions of feature maps
102
+ nn.Flatten(),
103
+ nn.Linear(32 * 16 * 16, 16), # adjust the input size based on the output of the last conv layer
104
+ nn.Linear(16, output),
105
+ )
106
+
107
+
108
+ def forward(self, x):
109
+ # x = self.pool(F.relu(self.conv1(x))) # First convoltional layer, then ReLU active, then max pooling
110
+ # x = self.pool(F.relu(self.conv2(x))) # Second convolutional layer, then ReLu, then pooling
111
+
112
+ # x = x.view(x.size(0), -1) # Flatten tensor before passing through fully connected layers
113
+
114
+ # x = F.relu(self.fc1(x)) # First fully connected layer, then ReLu, then pooling
115
+ # x = self.fc2(x) # Layer with predictions, fully connected
116
+
117
+ return self.network(x)
118
+
119
+ # creates instance of the model
120
+ model = Net()
121
+
122
+ # create the optimizer and criterion
123
+ criterion = nn.CrossEntropyLoss()
124
+ optimizer = optim.SGD(model.parameters() , lr=learning_rate, momentum=0.9)
125
+ # Maybe use Adam
126
+
127
+ # moves model to device (ie. cpu/gpu)
128
+ model.to(device)
129
+
130
+ print("started training")
131
+ for epoch in range(num_epochs):
132
+ model.train() # set model to training mode (important when using dropout or batch normalization)
133
+
134
+ running_loss = 0.0
135
+ for batch_idx, (images, labels) in enumerate(train_loader):
136
+ inputs = images.to(device)
137
+ labels = labels.to(device)
138
+ # print("print inputs shape: ", inputs.shape)
139
+
140
+ optimizer.zero_grad() # reset gradients
141
+
142
+ # forward pass
143
+ predictions = model(inputs)
144
+
145
+ # compute loss
146
+ loss = criterion(predictions, labels)
147
+
148
+ # Backpropogation
149
+ loss.backward()
150
+
151
+ # update models parameters
152
+ optimizer.step()
153
+
154
+ # print statistics
155
+ running_loss += loss.item()
156
+ if batch_idx % 2000 == 1999: # print every 2000 mini-batches
157
+ print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 2000:.3f}')
158
+ running_loss = 0.0
159
+
160
+ print(f"epoch: {epoch}/{num_epochs}")
161
+
162
+ print("finished training")
163
+
164
+
165
+ # After training, evaluate the model on the test dataset to get final performance metrics
166
+ model.eval() # Set the model to evaluation mode (important when using dropout or batch normalization)
167
+ correct = 0
168
+ total = 0
169
+
170
+ with torch.no_grad():
171
+ for batch_idx, (images, labels) in enumerate(test_loader):
172
+ images = images.to(device)
173
+ labels = labels.to(device)
174
+
175
+ # Forward pass
176
+ predictions = model(images)
177
+
178
+ # Compute evaluation metrics (e.g., accuracy, precision, recall, etc.)
179
+ # get predicted class for each image
180
+ _, predicted = torch.max(predictions.data, 1)
181
+
182
+ # Count the total number of labels in the test dataset
183
+ total += labels.size(0)
184
+
185
+ # Count the number of correct predictions
186
+ correct += (predicted == labels).sum().item()
187
+
188
+ # calculate the accuracy
189
+ accuracy = correct/total
190
+ print(f"Accuracy on the test dataset: {accuracy:.2%}")
191
+
192
+
193
+ ## IMPROVEMENTS/DEGREDATIONS ##
194
+ # BASELINE: ~51-54%
195
+
196
+ # After AutoAugment(CIFAR10): ~40%
197
+
198
+ # After Dropout: ~51-52%
199
+
200
+ # After adding another fully connected layer (64 in, 16 out): ~50-51%
201
+
202
+ # After adding weight decay to optimizer: (0.01): ~51+%
203
+
204
+ # ADDED: After adding all layers to nn.Sequential: ~55-57%
205
+
206
+ # After adding a 3rd Conv2d layer (64, 32)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ datasets
2
+ Pillow