Cifar10 / expModel.py
Sendeky's picture
Added expModel.py with 75.58% accuracy at 33 epochs
b2caf23
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from cifar10 import Cifar10
from CustomCIFAR10Dataset import CustomCIFAR10Dataset
# create dataset builder instance
cifar10_builder = Cifar10()
# downloads the dataset
cifar10_builder.download_and_prepare()
# generate the dataset ('train', 'test' portion)
train_data = cifar10_builder.as_dataset(split='train')
test_data = cifar10_builder.as_dataset(split='test')
train_images = train_data["img"]
train_labels = train_data["label"]
test_images = test_data["img"]
test_labels = test_data["label"]
# Cifar10 classes
classes = ("airplane", "automobile", "bird", "cat", "deer",
"dog", "frog", "horse", "ship", "truck")
# PARAMETERS
# batch size during training
batch_size = 128
# image size
img_size = 32
# number of channels in image (3, because RGB in this case)
nc = 3
# output size (10 classes)
output = len(classes)
# Num of GPUs (pick 0 for CPU)
ngpu = 1
# number of workers
nw = 0
# number of training epochs
num_epochs = 33
# learning rate
learning_rate = 0.0022
# chooses which device to use
device = torch.device("cuda:0" if (torch.cuda.is_available()) and (ngpu > 0) else "cpu")
# transforms for image. CONVERT TO TENSOR VERY IMPORTANT, OTHERWISE DATALOADER WON"T ACCEPT IMAGE
transform = transforms.Compose([
transforms.Resize((32, 32)), # Resize the image to 32x32 (required for CIFAR-10)
transforms.RandomResizedCrop(32, scale=(0.8, 0.8)), # Random crop (image augmentation)
transforms.RandomHorizontalFlip(), # Random hoizontal flip (image augmentation)
transforms.ToTensor(), # Convert PIL Image to a tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalize the image to [-1, 1]
])
# We use our custsom cifar10 dataset class to convert the dataset to a format that the torch dataloader can use
train_ds = CustomCIFAR10Dataset(train_data["img"], train_data["label"], transform=transform)
test_ds = CustomCIFAR10Dataset(test_data["img"], test_data["label"], transform=transform)
# LOADERS FOR DATASET
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=nw)
test_loader = DataLoader(test_ds, batch_size, shuffle=True, num_workers=nw)
# The nueral net class
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.network = nn.Sequential(
# first 2 concolutional layers
nn.Conv2d(nc, 16, kernel_size=3, stride=1, padding=1), # a convoltional layer with 3 input channels, 16 output channels,
# a kernel size of 3, a stride of 1, and padding of 1
nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1, stride=1),
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(),
nn.MaxPool2d(1, 1),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1, stride=1),
nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(),
nn.MaxPool2d(kernel_size=1, stride=1),
nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1),
# max pooling layers
nn.MaxPool2d(kernel_size=2, stride=2), # a max pooling layer with kernel size of 3 and stride of 2
# helps reduce spatial dimensions of feature maps
nn.Flatten(),
nn.Linear(16 * 16 * 16, 64), # adjust the input size based on the output of the last conv layer
nn.Linear(64, 128),
nn.Dropout(0.2),
nn.Linear(128, 16),
nn.Linear(16, output),
)
def forward(self, x):
return self.network(x)
# creates instance of the model
model = Net()
print("model parameters: ", model.parameters)
# create the optimizer and criterion
criterion = nn.CrossEntropyLoss()
# Adam optimizer yields much better results than SGD
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)
# moves model to device (ie. cpu/gpu)
model.to(device)
# accuracy array (for plotting)
accuracyArr = []
print("started training")
for epoch in range(num_epochs):
model.train() # set model to training mode (important when using dropout or batch normalization)
running_loss = 0.0
for batch_idx, (images, labels) in enumerate(train_loader):
inputs = images.to(device)
labels = labels.to(device)
# print("print inputs shape: ", inputs.shape)
optimizer.zero_grad() # reset gradients
# forward pass
predictions = model(inputs)
# compute loss
loss = criterion(predictions, labels)
# Backpropogation
loss.backward()
# update models parameters
optimizer.step()
# print statistics
running_loss += loss.item()
print(f"epoch: {epoch + 1}/{num_epochs} Loss: {running_loss}")
# After training, evaluate the model on the test dataset to get final performance metrics
model.eval() # Set the model to evaluation mode (important when using dropout or batch normalization)
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
predictions = model(images)
# Compute evaluation metrics (e.g., accuracy, precision, recall, etc.)
# get predicted class for each image
_, predicted = torch.max(predictions.data, 1)
# Count the total number of labels in the test dataset
total += labels.size(0)
# Count the number of correct predictions
correct += (predicted == labels).sum().item()
# calculate the accuracy
accuracy = correct/total
accuracyArr.append(accuracy)
print(f"Accuracy on the test dataset: {accuracy:.2%}, epoch: {epoch + 1}")
# After training, evaluate the model on the test dataset to get final performance metrics
model.eval() # Set the model to evaluation mode (important when using dropout or batch normalization)
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
predictions = model(images)
# Compute evaluation metrics (e.g., accuracy, precision, recall, etc.)
# get predicted class for each image
_, predicted = torch.max(predictions.data, 1)
# Count the total number of labels in the test dataset
total += labels.size(0)
# Count the number of correct predictions
correct += (predicted == labels).sum().item()
# calculate the accuracy
accuracy = correct/total
print(f"Accuracy on the test dataset: {accuracy:.2%}")
## IMPROVEMENTS/DEGREDATIONS ##
# BASELINE: ~51-54%
# After AutoAugment(CIFAR10): ~40%
# After Dropout: ~51-52%
# After adding another fully connected layer (64 in, 16 out): ~50-51%
# After adding weight decay to optimizer: (0.01): ~51+%
# ADDED: After adding all layers to nn.Sequential: ~55-57%
# ADDED: After using optim.Adam instead of optim.SGD: ~61-62%
# ADDED: After mimicking VGG16 architecture with: Conv2d -> ReLU -> MaxPool -> REPEAT
# MAX CURRENT ACCURACY: 72.71%
# import numpy as np
# import matplotlib.pyplot as plt
# y = np.array(accuracyArr)
# x = num_epochs
# plt.plot(x, y)
# plt.show()