File size: 6,259 Bytes
41eee5f 7b15e2a 41eee5f 7b15e2a 41eee5f 7b15e2a 41eee5f b2caf23 41eee5f 7b15e2a b2caf23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from cifar10 import Cifar10
from CustomCIFAR10Dataset import CustomCIFAR10Dataset
# create dataset builder instance
cifar10_builder = Cifar10()
# downloads the dataset
cifar10_builder.download_and_prepare()
# generate the dataset ('train', 'test' portion)
train_data = cifar10_builder.as_dataset(split='train')
test_data = cifar10_builder.as_dataset(split='test')
train_images = train_data["img"]
train_labels = train_data["label"]
test_images = test_data["img"]
test_labels = test_data["label"]
# Cifar10 classes
classes = ("airplane", "automobile", "bird", "cat", "deer",
"dog", "frog", "horse", "ship", "truck")
# # we can plot and access the images like this
# from matplotlib import pyplot as plt
# from matplotlib import image as mpimg
# # doing index first and then "img" is faster because image is decoded immediately when chosen (index -> decoding is faster than decoding -> index)
# plt.imshow(train_ds[0]["img"])
# plt.show()
# PARAMETERS
# batch size during training
batch_size = 128
# image size
img_size = 32
# number of channels in image (3, because RGB in this case)
nc = 3
# output size (10 classes)
output = len(classes)
# Num of GPUs (pick 0 for CPU)
ngpu = 0
# number of workers
nw = 0
# number of training epochs
num_epochs = 5
# learning rate
learning_rate = 0.0022
# chooses which device to use
device = torch.device("cuda:0" if (torch.cuda.is_available()) and (ngpu > 0) else "cpu")
# transforms for image. CONVERT TO TENSOR VERY IMPORTANT, OTHERWISE DATALOADER WON"T ACCEPT IMAGE
transform = transforms.Compose([
transforms.Resize((32, 32)), # Resize the image to 32x32 (required for CIFAR-10)
transforms.ToTensor(), # Convert PIL Image to a tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalize the image to [-1, 1]
])
# We use our custsom cifar10 dataset class to convert the dataset to a format that the torch dataloader can use
train_ds = CustomCIFAR10Dataset(train_data["img"], train_data["label"], transform=transform)
test_ds = CustomCIFAR10Dataset(test_data["img"], test_data["label"], transform=transform)
# LOADERS FOR DATASET
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=nw)
test_loader = DataLoader(test_ds, batch_size, shuffle=True, num_workers=nw)
# The nueral net class
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.network = nn.Sequential(
# first 2 concolutional layers
nn.Conv2d(nc, 16, kernel_size=3, stride=1, padding=1), # a convoltional layer with 3 input channels, 16 output channels,
# a kernel size of 3, a stride of 1, and padding of 1
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
# max pooling layers
nn.MaxPool2d(kernel_size=2, stride=2), # a max pooling layer with kernel size of 3 and stride of 1
# helps reduce spatial dimensions of feature maps
nn.Flatten(),
nn.Linear(32 * 16 * 16, 64), # adjust the input size based on the output of the last conv layer
nn.Linear(64, 16),
nn.Linear(16, output),
)
def forward(self, x):
return self.network(x)
# creates instance of the model
model = Net()
# create the optimizer and criterion
criterion = nn.CrossEntropyLoss()
# Adam optimizer yields much better results than SGD
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# moves model to device (ie. cpu/gpu)
model.to(device)
print("started training")
for epoch in range(num_epochs):
model.train() # set model to training mode (important when using dropout or batch normalization)
running_loss = 0.0
for batch_idx, (images, labels) in enumerate(train_loader):
inputs = images.to(device)
labels = labels.to(device)
# print("print inputs shape: ", inputs.shape)
optimizer.zero_grad() # reset gradients
# forward pass
predictions = model(inputs)
# compute loss
loss = criterion(predictions, labels)
# Backpropogation
loss.backward()
# update models parameters
optimizer.step()
# print statistics
running_loss += loss.item()
if batch_idx % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print(f"epoch: {epoch + 1}/{num_epochs}")
print("finished training")
# After training, evaluate the model on the test dataset to get final performance metrics
model.eval() # Set the model to evaluation mode (important when using dropout or batch normalization)
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
predictions = model(images)
# Compute evaluation metrics (e.g., accuracy, precision, recall, etc.)
# get predicted class for each image
_, predicted = torch.max(predictions.data, 1)
# Count the total number of labels in the test dataset
total += labels.size(0)
# Count the number of correct predictions
correct += (predicted == labels).sum().item()
# calculate the accuracy
accuracy = correct/total
print(f"Accuracy on the test dataset: {accuracy:.2%}")
## IMPROVEMENTS/DEGREDATIONS ##
# BASELINE: ~51-54%
# After AutoAugment(CIFAR10): ~40%
# After Dropout: ~51-52%
# After adding another fully connected layer (64 in, 16 out): ~50-51%
# After adding weight decay to optimizer: (0.01): ~51+%
# ADDED: After adding all layers to nn.Sequential: ~55-57%
# ADDED: After using optim.Adam instead of optim.SGD: ~61-62%
# MAX Observed accuracy: 63.20% |