Upload 4 files

Browse files

Files changed (4) hide show

CustomCIFAR10Dataset.py +23 -0
cifar10.py +121 -0
model.py +206 -0
requirements.txt +2 -0

CustomCIFAR10Dataset.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from PIL import Image
+from torch.utils.data import Dataset
+# We have to make a custom dataset class to load them with the torch DataLoader
+# Custom dataset class for CIFAR-10 images
+class CustomCIFAR10Dataset(Dataset):
+    def __init__(self, images, labels, transform=None):
+        self.images = images
+        self.labels = labels
+        self.transform = transform
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, index):
+        image = self.images[index]
+        label = self.labels[index]
+        # Apply the transformations (if any)
+        if self.transform is not None:
+            image = self.transform(image)
+        return image, label

cifar10.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors and the HuggingFace Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""CIFAR-10 Data Set"""
+import pickle
+import numpy as np
+import datasets
+from datasets.tasks import ImageClassification
+_CITATION = """\
+@TECHREPORT{Krizhevsky09learningmultiple,
+    author = {Alex Krizhevsky},
+    title = {Learning multiple layers of features from tiny images},
+    institution = {},
+    year = {2009}
+}
+"""
+_DESCRIPTION = """\
+The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images
+per class. There are 50000 training images and 10000 test images.
+"""
+_DATA_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+_NAMES = [
+    "airplane",
+    "automobile",
+    "bird",
+    "cat",
+    "deer",
+    "dog",
+    "frog",
+    "horse",
+    "ship",
+    "truck",
+]
+class Cifar10(datasets.GeneratorBasedBuilder):
+    """CIFAR-10 Data Set"""
+    BUILDER_CONFIGS = [
+        datasets.BuilderConfig(
+            name="plain_text",
+            version=datasets.Version("1.0.0", ""),
+            description="Plain text import of CIFAR-10 Data Set",
+        )
+    ]
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "img": datasets.Image(),
+                    "label": datasets.features.ClassLabel(names=_NAMES),
+                }
+            ),
+            supervised_keys=("img", "label"),
+            homepage="https://www.cs.toronto.edu/~kriz/cifar.html",
+            citation=_CITATION,
+            task_templates=ImageClassification(image_column="img", label_column="label"),
+        )
+    def _split_generators(self, dl_manager):
+        archive = dl_manager.download(_DATA_URL)
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"files": dl_manager.iter_archive(archive), "split": "train"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"files": dl_manager.iter_archive(archive), "split": "test"}
+            ),
+        ]
+    def _generate_examples(self, files, split):
+        """This function returns the examples in the raw (text) form."""
+        if split == "train":
+            batches = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5"]
+        if split == "test":
+            batches = ["test_batch"]
+        batches = [f"cifar-10-batches-py/{filename}" for filename in batches]
+        for path, fo in files:
+            if path in batches:
+                dict = pickle.load(fo, encoding="bytes")
+                labels = dict[b"labels"]
+                images = dict[b"data"]
+                for idx, _ in enumerate(images):
+                    img_reshaped = np.transpose(np.reshape(images[idx], (3, 32, 32)), (1, 2, 0))
+                    yield f"{path}_{idx}", {
+                        "img": img_reshaped,
+                        "label": labels[idx],
+                    }

model.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+import datasets
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader, Dataset
+from PIL import Image
+from cifar10 import Cifar10
+from CustomCIFAR10Dataset import CustomCIFAR10Dataset
+# create dataset builder instance
+cifar10_builder = Cifar10()
+# downloads the dataset
+cifar10_builder.download_and_prepare()
+# generate the dataset ('train', 'test' portion)
+train_data = cifar10_builder.as_dataset(split='train')
+test_data = cifar10_builder.as_dataset(split='test')
+train_images = train_data["img"]
+train_labels = train_data["label"]
+test_images = test_data["img"]
+test_labels = test_data["label"]
+# Cifar10 classes
+classes =  ("airplane", "automobile", "bird", "cat", "deer",
+            "dog", "frog", "horse", "ship", "truck")
+# # we can plot and access the images like this
+# from matplotlib import pyplot as plt
+# from matplotlib import image as mpimg
+# # doing index first and then "img" is faster because image is decoded immediately when chosen (index -> decoding is faster than decoding -> index)
+# plt.imshow(train_ds[0]["img"])
+# plt.show()
+# PARAMETERS
+# batch size during training
+batch_size = 128
+# image size
+img_size = 32
+# number of channels in image (3, because RGB in this case)
+nc = 3
+# output size (10 classes)
+output = len(classes)
+# Num of GPUs (pick 0 for CPU)
+ngpu = 0
+# number of workers
+nw = 0
+# number of training epochs
+num_epochs = 5
+# learning rate
+learning_rate = 0.0022
+# chooses which device to use
+device = torch.device("cuda:0" if (torch.cuda.is_available()) and (ngpu > 0) else "cpu")
+# transforms for image. CONVERT TO TENSOR VERY IMPORTANT, OTHERWISE DATALOADER WON"T ACCEPT IMAGE
+transform = transforms.Compose([
+    transforms.Resize((32, 32)),         # Resize the image to 32x32 (required for CIFAR-10)
+    transforms.ToTensor(),               # Convert PIL Image to a tensor
+    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # Normalize the image to [-1, 1]
+])
+# We use our custsom cifar10 dataset class to convert the dataset to a format that the torch dataloader can use
+train_ds = CustomCIFAR10Dataset(train_data["img"], train_data["label"], transform=transform)
+test_ds = CustomCIFAR10Dataset(test_data["img"], test_data["label"], transform=transform)
+# LOADERS FOR DATASET
+train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=nw)
+test_loader = DataLoader(test_ds, batch_size, shuffle=True, num_workers=nw)
+# The nueral net class
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        # placing all layers in nn.Sequential brought +4% accuracy improvement
+        self.network = nn.Sequential(
+            # first 2 concolutional layers
+            nn.Conv2d(nc, 16, kernel_size=3, stride=1, padding=1),          # a convoltional layer with 3 input channels, 16 output channels,
+                                                                            # a kernel size of 3, a stride of 1, and padding of 1
+            nn.Conv2d(16, 64, kernel_size=3, stride=1, padding=1),
+            nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
+            # max pooling layers
+            nn.MaxPool2d(kernel_size=2, stride=2),                          # a max pooling layer with kernel size of 3 and stride of 1
+                                                                            # helps reduce spatial dimensions of feature maps
+            nn.Flatten(),
+            nn.Linear(32 * 16 * 16, 16),                                    # adjust the input size based on the output of the last conv layer
+            nn.Linear(16, output),
+        )
+    def forward(self, x):
+        # x = self.pool(F.relu(self.conv1(x)))                # First convoltional layer, then ReLU active, then max pooling
+        # x = self.pool(F.relu(self.conv2(x)))                # Second convolutional layer, then ReLu, then pooling
+        # x = x.view(x.size(0), -1)                           # Flatten tensor before passing through fully connected layers
+        # x = F.relu(self.fc1(x))                             # First fully connected layer, then ReLu, then pooling
+        # x = self.fc2(x)                                     # Layer with predictions, fully connected
+        return self.network(x)
+# creates instance of the model
+model = Net()
+# create the optimizer and criterion
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.SGD(model.parameters() , lr=learning_rate, momentum=0.9)
+# Maybe use Adam
+# moves model to device (ie. cpu/gpu)
+model.to(device)
+print("started training")
+for epoch in range(num_epochs):
+    model.train()           # set model to training mode (important when using dropout or batch normalization)
+    running_loss = 0.0
+    for batch_idx, (images, labels) in enumerate(train_loader):
+        inputs = images.to(device)
+        labels = labels.to(device)
+        # print("print inputs shape: ", inputs.shape)
+        optimizer.zero_grad()       # reset gradients
+        # forward pass
+        predictions = model(inputs)
+        # compute loss
+        loss = criterion(predictions, labels)
+        # Backpropogation
+        loss.backward()
+        # update models parameters
+        optimizer.step()
+        # print statistics
+        running_loss += loss.item()
+        if batch_idx % 2000 == 1999:    # print every 2000 mini-batches
+            print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 2000:.3f}')
+            running_loss = 0.0
+    print(f"epoch: {epoch}/{num_epochs}")
+print("finished training")
+# After training, evaluate the model on the test dataset to get final performance metrics
+model.eval()  # Set the model to evaluation mode (important when using dropout or batch normalization)
+correct = 0
+total = 0
+with torch.no_grad():
+    for batch_idx, (images, labels) in enumerate(test_loader):
+        images = images.to(device)
+        labels = labels.to(device)
+        # Forward pass
+        predictions = model(images)
+        # Compute evaluation metrics (e.g., accuracy, precision, recall, etc.)
+        # get predicted class for each image
+        _, predicted = torch.max(predictions.data, 1)
+        # Count the total number of labels in the test dataset
+        total += labels.size(0)
+        # Count the number of correct predictions
+        correct += (predicted == labels).sum().item()
+# calculate the accuracy
+accuracy = correct/total
+print(f"Accuracy on the test dataset: {accuracy:.2%}")
+## IMPROVEMENTS/DEGREDATIONS ##
+# BASELINE: ~51-54%
+# After AutoAugment(CIFAR10):  ~40%
+# After Dropout: ~51-52%
+# After adding another fully connected layer (64 in, 16 out): ~50-51%
+# After adding weight decay to optimizer: (0.01): ~51+%
+# ADDED: After adding all layers to nn.Sequential: ~55-57%
+# After adding a 3rd Conv2d layer (64, 32)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ datasets
2	+ Pillow