File size: 7,922 Bytes

8f16f16

from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/My Drive/UpsideDownDetector'

#importing all the libraries
from __future__ import print_function, division
from builtins import range, input
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, AveragePooling2D, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve
import matplotlib.pyplot as plt
import cv2
from glob import glob

Defining Parameters

#define size to which images are to be resized
IMAGE_SIZE = [224, 224]

# training config:
epochs = 500
batch_size = 32

#define paths
normal_path = '/content/drive/MyDrive/UpsideDownDetector/Dataset/Normal'
upsideDown_path = '/content/drive/MyDrive/UpsideDownDetector/Dataset/UpsideDown'

# Using glob to grab images from path
normal_files = glob(normal_path + '/*')
upsideDown_files = glob(upsideDown_path + '/*')

Fetching images along with their labels from the dataset.

The dataset only contains images of cats. The problem is treated as a binary classification problem. There are 2 classes. The first one has normal images and the second one has upside down images. **Each class contains 500 images**.

# Preparing Labels
normal_labels = []
upsideDown_labels = []

normal_images=[]
upsideDown_images=[]

import cv2 

for i in range(len(normal_files)):
  image = cv2.imread(normal_files[i])
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  image = cv2.resize(image,(150,150))
  normal_images.append(image)
  normal_labels.append('Normal')
for i in range(len(upsideDown_files)):
  image = cv2.imread(upsideDown_files[i])
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  image = cv2.resize(image,(150,150))
  upsideDown_images.append(image)
  upsideDown_labels.append('Upside Down')

#A look at a random images from the dataset
def plot_images(images, title):
    nrows, ncols = 5, 8
    figsize = [10, 6]

    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, facecolor=(1, 1, 1))

    for i, axi in enumerate(ax.flat):
        axi.imshow(images[i])
        axi.set_axis_off()

    plt.suptitle(title, fontsize=24)
    plt.tight_layout(pad=0.2, rect=[0, 0, 1, 0.9])
    plt.show()
plot_images(normal_images, 'Normal')
plot_images(upsideDown_images, 'Upside Down')

# normalizing to interval of [0,1]
normal_images = np.array(normal_images) / 255
upsideDown_images = np.array(upsideDown_images) / 255

Train/Test Split

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical

# split into training and testing
normal_x_train, normal_x_test, normal_y_train, normal_y_test = train_test_split(
    normal_images, normal_labels, test_size=0.2)
upsideDown_x_train, upsideDown_x_test, upsideDown_y_train, upsideDown_y_test = train_test_split(
    upsideDown_images, upsideDown_labels, test_size=0.2)


X_train = np.concatenate((upsideDown_x_train, normal_x_train), axis=0)
X_test = np.concatenate((upsideDown_x_test, normal_x_test), axis=0)
y_train = np.concatenate((upsideDown_y_train, normal_y_train), axis=0)
y_test = np.concatenate((upsideDown_y_test, normal_y_test), axis=0)

# make labels into categories - either 0 or 1
y_train = LabelBinarizer().fit_transform(y_train)
y_train = to_categorical(y_train)

y_test = LabelBinarizer().fit_transform(y_test)
y_test = to_categorical(y_test)

plot_images(normal_x_train, 'X_train')
plot_images(normal_x_test, 'X_test')

Defining the Model

model = keras.Sequential()

# Convolutional layer and maxpool layer 1
model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model.add(keras.layers.MaxPool2D(2,2))

# Convolutional layer and maxpool layer 2
model.add(keras.layers.Conv2D(64,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))

# Convolutional layer and maxpool layer 3
model.add(keras.layers.Conv2D(128,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))

# Convolutional layer and maxpool layer 4
model.add(keras.layers.Conv2D(128,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))

# This layer flattens the resulting image array to 1D array
model.add(keras.layers.Flatten())

# Hidden layer with 512 neurons and Rectified Linear Unit activation function 
model.add(keras.layers.Dense(512,activation='relu'))

model.add(keras.layers.Dense(2,activation='sigmoid'))

model.compile(
        loss='binary_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
)

train_aug = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

model.compile(
        loss='categorical_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
)

train_aug = ImageDataGenerator(
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

model.summary()

Training

history = model.fit(train_aug.flow(X_train, y_train, batch_size=32),
                    validation_data=(X_test, y_test),
                    validation_steps=len(X_test) / 32,
                    steps_per_epoch=len(X_train) / 32,
                    epochs=100)

Generating Predictions

y_pred = model.predict(X_test, batch_size=batch_size)

Here I have generated random 10 predictions. You can see that most of them have been clssified correctly but, there are inccorect predictions as well.

prediction=y_pred[100:110]
for index, probability in enumerate(prediction):
  if probability[1] > 0.5:
        plt.title('%.2f' % (probability[1]*100) + '% Normal')       
  else:
        plt.title('%.2f' % ((1-probability[1])*100) + '% Upside Down')
  plt.imshow(X_test[index])
  plt.show()

# Generating ROC Curve
Remember! the datasize is too small (only 500 images per class), thats why we can not trust these results.

y_pred_bin = np.argmax(y_pred, axis=1)
y_test_bin = np.argmax(y_test, axis=1)

fpr, tpr, thresholds = roc_curve(y_test_bin, y_pred_bin)
plt.plot(fpr, tpr)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.rcParams['font.size'] = 12
plt.title('ROC curve for our model')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid(True)

# Confusion Matrix

def plot_confusion_matrix(normalize):
  classes = ['Normal','Upside Down']
  tick_marks = [0.5,1.5]
  cn = confusion_matrix(y_test_bin, y_pred_bin,normalize=normalize)
  sns.heatmap(cn,cmap='plasma',annot=True)
  plt.xticks(tick_marks, classes)
  plt.yticks(tick_marks, classes)
  plt.title('Confusion Matrix')
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.show()

print('Confusion Matrix without Normalization')
plot_confusion_matrix(normalize=None)

print('Confusion Matrix with Normalized Values')
plot_confusion_matrix(normalize='true')

**Write up:**

Link to the model on Hugging Face Hub: 
Include some examples of misclassified images. 

*misclassified images are shown in the previous cells.*

Please explain what you might do to improve your model's performance on these images in the future (you do not need to implement these suggestions)

*Firstly, I would increase the size of the dataset. Next, I would use an ensemble approach to improve the model's accuracy. I usually work with medical imaging data where the difference between images is very small e.g pneumonia X-ray vs normal X-ray. In such instances, using pre-trained networks such as Resnet50, VGG16, etc, and then calculating a weighted average of predictions of each network has always given me the best results. Here in this case the difference between normal and upside-down images is not very big. So, I would use the weighted ensemble approach to solve this problem.*