Upload 4 files
Browse files- CustomCIFAR10Dataset.py +23 -0
- cifar10.py +121 -0
- model.py +206 -0
- requirements.txt +2 -0
CustomCIFAR10Dataset.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
from torch.utils.data import Dataset
|
3 |
+
|
4 |
+
# We have to make a custom dataset class to load them with the torch DataLoader
|
5 |
+
# Custom dataset class for CIFAR-10 images
|
6 |
+
class CustomCIFAR10Dataset(Dataset):
|
7 |
+
def __init__(self, images, labels, transform=None):
|
8 |
+
self.images = images
|
9 |
+
self.labels = labels
|
10 |
+
self.transform = transform
|
11 |
+
|
12 |
+
def __len__(self):
|
13 |
+
return len(self.images)
|
14 |
+
|
15 |
+
def __getitem__(self, index):
|
16 |
+
image = self.images[index]
|
17 |
+
label = self.labels[index]
|
18 |
+
|
19 |
+
# Apply the transformations (if any)
|
20 |
+
if self.transform is not None:
|
21 |
+
image = self.transform(image)
|
22 |
+
|
23 |
+
return image, label
|
cifar10.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2020 The TensorFlow Datasets Authors and the HuggingFace Datasets Authors.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
# Lint as: python3
|
17 |
+
"""CIFAR-10 Data Set"""
|
18 |
+
|
19 |
+
|
20 |
+
import pickle
|
21 |
+
|
22 |
+
import numpy as np
|
23 |
+
|
24 |
+
import datasets
|
25 |
+
from datasets.tasks import ImageClassification
|
26 |
+
|
27 |
+
|
28 |
+
_CITATION = """\
|
29 |
+
@TECHREPORT{Krizhevsky09learningmultiple,
|
30 |
+
author = {Alex Krizhevsky},
|
31 |
+
title = {Learning multiple layers of features from tiny images},
|
32 |
+
institution = {},
|
33 |
+
year = {2009}
|
34 |
+
}
|
35 |
+
"""
|
36 |
+
|
37 |
+
_DESCRIPTION = """\
|
38 |
+
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images
|
39 |
+
per class. There are 50000 training images and 10000 test images.
|
40 |
+
"""
|
41 |
+
|
42 |
+
_DATA_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
|
43 |
+
|
44 |
+
_NAMES = [
|
45 |
+
"airplane",
|
46 |
+
"automobile",
|
47 |
+
"bird",
|
48 |
+
"cat",
|
49 |
+
"deer",
|
50 |
+
"dog",
|
51 |
+
"frog",
|
52 |
+
"horse",
|
53 |
+
"ship",
|
54 |
+
"truck",
|
55 |
+
]
|
56 |
+
|
57 |
+
|
58 |
+
class Cifar10(datasets.GeneratorBasedBuilder):
|
59 |
+
"""CIFAR-10 Data Set"""
|
60 |
+
|
61 |
+
BUILDER_CONFIGS = [
|
62 |
+
datasets.BuilderConfig(
|
63 |
+
name="plain_text",
|
64 |
+
version=datasets.Version("1.0.0", ""),
|
65 |
+
description="Plain text import of CIFAR-10 Data Set",
|
66 |
+
)
|
67 |
+
]
|
68 |
+
|
69 |
+
def _info(self):
|
70 |
+
return datasets.DatasetInfo(
|
71 |
+
description=_DESCRIPTION,
|
72 |
+
features=datasets.Features(
|
73 |
+
{
|
74 |
+
"img": datasets.Image(),
|
75 |
+
"label": datasets.features.ClassLabel(names=_NAMES),
|
76 |
+
}
|
77 |
+
),
|
78 |
+
supervised_keys=("img", "label"),
|
79 |
+
homepage="https://www.cs.toronto.edu/~kriz/cifar.html",
|
80 |
+
citation=_CITATION,
|
81 |
+
task_templates=ImageClassification(image_column="img", label_column="label"),
|
82 |
+
)
|
83 |
+
|
84 |
+
def _split_generators(self, dl_manager):
|
85 |
+
archive = dl_manager.download(_DATA_URL)
|
86 |
+
|
87 |
+
return [
|
88 |
+
datasets.SplitGenerator(
|
89 |
+
name=datasets.Split.TRAIN, gen_kwargs={"files": dl_manager.iter_archive(archive), "split": "train"}
|
90 |
+
),
|
91 |
+
datasets.SplitGenerator(
|
92 |
+
name=datasets.Split.TEST, gen_kwargs={"files": dl_manager.iter_archive(archive), "split": "test"}
|
93 |
+
),
|
94 |
+
]
|
95 |
+
|
96 |
+
def _generate_examples(self, files, split):
|
97 |
+
"""This function returns the examples in the raw (text) form."""
|
98 |
+
|
99 |
+
if split == "train":
|
100 |
+
batches = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5"]
|
101 |
+
|
102 |
+
if split == "test":
|
103 |
+
batches = ["test_batch"]
|
104 |
+
batches = [f"cifar-10-batches-py/{filename}" for filename in batches]
|
105 |
+
|
106 |
+
for path, fo in files:
|
107 |
+
|
108 |
+
if path in batches:
|
109 |
+
dict = pickle.load(fo, encoding="bytes")
|
110 |
+
|
111 |
+
labels = dict[b"labels"]
|
112 |
+
images = dict[b"data"]
|
113 |
+
|
114 |
+
for idx, _ in enumerate(images):
|
115 |
+
|
116 |
+
img_reshaped = np.transpose(np.reshape(images[idx], (3, 32, 32)), (1, 2, 0))
|
117 |
+
|
118 |
+
yield f"{path}_{idx}", {
|
119 |
+
"img": img_reshaped,
|
120 |
+
"label": labels[idx],
|
121 |
+
}
|
model.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import datasets
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
import torch.optim as optim
|
7 |
+
import torchvision.transforms as transforms
|
8 |
+
from torch.utils.data import DataLoader, Dataset
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
from cifar10 import Cifar10
|
12 |
+
from CustomCIFAR10Dataset import CustomCIFAR10Dataset
|
13 |
+
|
14 |
+
|
15 |
+
# create dataset builder instance
|
16 |
+
cifar10_builder = Cifar10()
|
17 |
+
# downloads the dataset
|
18 |
+
cifar10_builder.download_and_prepare()
|
19 |
+
|
20 |
+
# generate the dataset ('train', 'test' portion)
|
21 |
+
train_data = cifar10_builder.as_dataset(split='train')
|
22 |
+
test_data = cifar10_builder.as_dataset(split='test')
|
23 |
+
|
24 |
+
train_images = train_data["img"]
|
25 |
+
train_labels = train_data["label"]
|
26 |
+
|
27 |
+
test_images = test_data["img"]
|
28 |
+
test_labels = test_data["label"]
|
29 |
+
|
30 |
+
# Cifar10 classes
|
31 |
+
classes = ("airplane", "automobile", "bird", "cat", "deer",
|
32 |
+
"dog", "frog", "horse", "ship", "truck")
|
33 |
+
|
34 |
+
# # we can plot and access the images like this
|
35 |
+
# from matplotlib import pyplot as plt
|
36 |
+
# from matplotlib import image as mpimg
|
37 |
+
|
38 |
+
# # doing index first and then "img" is faster because image is decoded immediately when chosen (index -> decoding is faster than decoding -> index)
|
39 |
+
# plt.imshow(train_ds[0]["img"])
|
40 |
+
# plt.show()
|
41 |
+
|
42 |
+
# PARAMETERS
|
43 |
+
# batch size during training
|
44 |
+
batch_size = 128
|
45 |
+
|
46 |
+
# image size
|
47 |
+
img_size = 32
|
48 |
+
|
49 |
+
# number of channels in image (3, because RGB in this case)
|
50 |
+
nc = 3
|
51 |
+
|
52 |
+
# output size (10 classes)
|
53 |
+
output = len(classes)
|
54 |
+
|
55 |
+
# Num of GPUs (pick 0 for CPU)
|
56 |
+
ngpu = 0
|
57 |
+
|
58 |
+
# number of workers
|
59 |
+
nw = 0
|
60 |
+
|
61 |
+
# number of training epochs
|
62 |
+
num_epochs = 5
|
63 |
+
|
64 |
+
# learning rate
|
65 |
+
learning_rate = 0.0022
|
66 |
+
|
67 |
+
# chooses which device to use
|
68 |
+
device = torch.device("cuda:0" if (torch.cuda.is_available()) and (ngpu > 0) else "cpu")
|
69 |
+
|
70 |
+
# transforms for image. CONVERT TO TENSOR VERY IMPORTANT, OTHERWISE DATALOADER WON"T ACCEPT IMAGE
|
71 |
+
transform = transforms.Compose([
|
72 |
+
transforms.Resize((32, 32)), # Resize the image to 32x32 (required for CIFAR-10)
|
73 |
+
transforms.ToTensor(), # Convert PIL Image to a tensor
|
74 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalize the image to [-1, 1]
|
75 |
+
])
|
76 |
+
|
77 |
+
# We use our custsom cifar10 dataset class to convert the dataset to a format that the torch dataloader can use
|
78 |
+
train_ds = CustomCIFAR10Dataset(train_data["img"], train_data["label"], transform=transform)
|
79 |
+
test_ds = CustomCIFAR10Dataset(test_data["img"], test_data["label"], transform=transform)
|
80 |
+
|
81 |
+
# LOADERS FOR DATASET
|
82 |
+
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=nw)
|
83 |
+
test_loader = DataLoader(test_ds, batch_size, shuffle=True, num_workers=nw)
|
84 |
+
|
85 |
+
|
86 |
+
# The nueral net class
|
87 |
+
class Net(nn.Module):
|
88 |
+
def __init__(self):
|
89 |
+
super(Net, self).__init__()
|
90 |
+
|
91 |
+
# placing all layers in nn.Sequential brought +4% accuracy improvement
|
92 |
+
self.network = nn.Sequential(
|
93 |
+
# first 2 concolutional layers
|
94 |
+
nn.Conv2d(nc, 16, kernel_size=3, stride=1, padding=1), # a convoltional layer with 3 input channels, 16 output channels,
|
95 |
+
# a kernel size of 3, a stride of 1, and padding of 1
|
96 |
+
nn.Conv2d(16, 64, kernel_size=3, stride=1, padding=1),
|
97 |
+
nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
|
98 |
+
|
99 |
+
# max pooling layers
|
100 |
+
nn.MaxPool2d(kernel_size=2, stride=2), # a max pooling layer with kernel size of 3 and stride of 1
|
101 |
+
# helps reduce spatial dimensions of feature maps
|
102 |
+
nn.Flatten(),
|
103 |
+
nn.Linear(32 * 16 * 16, 16), # adjust the input size based on the output of the last conv layer
|
104 |
+
nn.Linear(16, output),
|
105 |
+
)
|
106 |
+
|
107 |
+
|
108 |
+
def forward(self, x):
|
109 |
+
# x = self.pool(F.relu(self.conv1(x))) # First convoltional layer, then ReLU active, then max pooling
|
110 |
+
# x = self.pool(F.relu(self.conv2(x))) # Second convolutional layer, then ReLu, then pooling
|
111 |
+
|
112 |
+
# x = x.view(x.size(0), -1) # Flatten tensor before passing through fully connected layers
|
113 |
+
|
114 |
+
# x = F.relu(self.fc1(x)) # First fully connected layer, then ReLu, then pooling
|
115 |
+
# x = self.fc2(x) # Layer with predictions, fully connected
|
116 |
+
|
117 |
+
return self.network(x)
|
118 |
+
|
119 |
+
# creates instance of the model
|
120 |
+
model = Net()
|
121 |
+
|
122 |
+
# create the optimizer and criterion
|
123 |
+
criterion = nn.CrossEntropyLoss()
|
124 |
+
optimizer = optim.SGD(model.parameters() , lr=learning_rate, momentum=0.9)
|
125 |
+
# Maybe use Adam
|
126 |
+
|
127 |
+
# moves model to device (ie. cpu/gpu)
|
128 |
+
model.to(device)
|
129 |
+
|
130 |
+
print("started training")
|
131 |
+
for epoch in range(num_epochs):
|
132 |
+
model.train() # set model to training mode (important when using dropout or batch normalization)
|
133 |
+
|
134 |
+
running_loss = 0.0
|
135 |
+
for batch_idx, (images, labels) in enumerate(train_loader):
|
136 |
+
inputs = images.to(device)
|
137 |
+
labels = labels.to(device)
|
138 |
+
# print("print inputs shape: ", inputs.shape)
|
139 |
+
|
140 |
+
optimizer.zero_grad() # reset gradients
|
141 |
+
|
142 |
+
# forward pass
|
143 |
+
predictions = model(inputs)
|
144 |
+
|
145 |
+
# compute loss
|
146 |
+
loss = criterion(predictions, labels)
|
147 |
+
|
148 |
+
# Backpropogation
|
149 |
+
loss.backward()
|
150 |
+
|
151 |
+
# update models parameters
|
152 |
+
optimizer.step()
|
153 |
+
|
154 |
+
# print statistics
|
155 |
+
running_loss += loss.item()
|
156 |
+
if batch_idx % 2000 == 1999: # print every 2000 mini-batches
|
157 |
+
print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 2000:.3f}')
|
158 |
+
running_loss = 0.0
|
159 |
+
|
160 |
+
print(f"epoch: {epoch}/{num_epochs}")
|
161 |
+
|
162 |
+
print("finished training")
|
163 |
+
|
164 |
+
|
165 |
+
# After training, evaluate the model on the test dataset to get final performance metrics
|
166 |
+
model.eval() # Set the model to evaluation mode (important when using dropout or batch normalization)
|
167 |
+
correct = 0
|
168 |
+
total = 0
|
169 |
+
|
170 |
+
with torch.no_grad():
|
171 |
+
for batch_idx, (images, labels) in enumerate(test_loader):
|
172 |
+
images = images.to(device)
|
173 |
+
labels = labels.to(device)
|
174 |
+
|
175 |
+
# Forward pass
|
176 |
+
predictions = model(images)
|
177 |
+
|
178 |
+
# Compute evaluation metrics (e.g., accuracy, precision, recall, etc.)
|
179 |
+
# get predicted class for each image
|
180 |
+
_, predicted = torch.max(predictions.data, 1)
|
181 |
+
|
182 |
+
# Count the total number of labels in the test dataset
|
183 |
+
total += labels.size(0)
|
184 |
+
|
185 |
+
# Count the number of correct predictions
|
186 |
+
correct += (predicted == labels).sum().item()
|
187 |
+
|
188 |
+
# calculate the accuracy
|
189 |
+
accuracy = correct/total
|
190 |
+
print(f"Accuracy on the test dataset: {accuracy:.2%}")
|
191 |
+
|
192 |
+
|
193 |
+
## IMPROVEMENTS/DEGREDATIONS ##
|
194 |
+
# BASELINE: ~51-54%
|
195 |
+
|
196 |
+
# After AutoAugment(CIFAR10): ~40%
|
197 |
+
|
198 |
+
# After Dropout: ~51-52%
|
199 |
+
|
200 |
+
# After adding another fully connected layer (64 in, 16 out): ~50-51%
|
201 |
+
|
202 |
+
# After adding weight decay to optimizer: (0.01): ~51+%
|
203 |
+
|
204 |
+
# ADDED: After adding all layers to nn.Sequential: ~55-57%
|
205 |
+
|
206 |
+
# After adding a 3rd Conv2d layer (64, 32)
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
datasets
|
2 |
+
Pillow
|