from google.colab import drive drive.mount('/content/drive') %cd '/content/drive/My Drive/UpsideDownDetector' #importing all the libraries from __future__ import print_function, division from builtins import range, input import tensorflow as tf from tensorflow import keras from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, AveragePooling2D, Dropout from tensorflow.keras.models import Model, load_model from tensorflow.keras.preprocessing import image from tensorflow.keras.preprocessing.image import ImageDataGenerator import numpy as np import os import cv2 import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, roc_curve import matplotlib.pyplot as plt import cv2 from glob import glob Defining Parameters #define size to which images are to be resized IMAGE_SIZE = [224, 224] # training config: epochs = 500 batch_size = 32 #define paths normal_path = '/content/drive/MyDrive/UpsideDownDetector/Dataset/Normal' upsideDown_path = '/content/drive/MyDrive/UpsideDownDetector/Dataset/UpsideDown' # Using glob to grab images from path normal_files = glob(normal_path + '/*') upsideDown_files = glob(upsideDown_path + '/*') Fetching images along with their labels from the dataset. The dataset only contains images of cats. The problem is treated as a binary classification problem. There are 2 classes. The first one has normal images and the second one has upside down images. **Each class contains 500 images**. # Preparing Labels normal_labels = [] upsideDown_labels = [] normal_images=[] upsideDown_images=[] import cv2 for i in range(len(normal_files)): image = cv2.imread(normal_files[i]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.resize(image,(150,150)) normal_images.append(image) normal_labels.append('Normal') for i in range(len(upsideDown_files)): image = cv2.imread(upsideDown_files[i]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.resize(image,(150,150)) upsideDown_images.append(image) upsideDown_labels.append('Upside Down') #A look at a random images from the dataset def plot_images(images, title): nrows, ncols = 5, 8 figsize = [10, 6] fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, facecolor=(1, 1, 1)) for i, axi in enumerate(ax.flat): axi.imshow(images[i]) axi.set_axis_off() plt.suptitle(title, fontsize=24) plt.tight_layout(pad=0.2, rect=[0, 0, 1, 0.9]) plt.show() plot_images(normal_images, 'Normal') plot_images(upsideDown_images, 'Upside Down') # normalizing to interval of [0,1] normal_images = np.array(normal_images) / 255 upsideDown_images = np.array(upsideDown_images) / 255 Train/Test Split from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelBinarizer from tensorflow.keras.utils import to_categorical # split into training and testing normal_x_train, normal_x_test, normal_y_train, normal_y_test = train_test_split( normal_images, normal_labels, test_size=0.2) upsideDown_x_train, upsideDown_x_test, upsideDown_y_train, upsideDown_y_test = train_test_split( upsideDown_images, upsideDown_labels, test_size=0.2) X_train = np.concatenate((upsideDown_x_train, normal_x_train), axis=0) X_test = np.concatenate((upsideDown_x_test, normal_x_test), axis=0) y_train = np.concatenate((upsideDown_y_train, normal_y_train), axis=0) y_test = np.concatenate((upsideDown_y_test, normal_y_test), axis=0) # make labels into categories - either 0 or 1 y_train = LabelBinarizer().fit_transform(y_train) y_train = to_categorical(y_train) y_test = LabelBinarizer().fit_transform(y_test) y_test = to_categorical(y_test) plot_images(normal_x_train, 'X_train') plot_images(normal_x_test, 'X_test') Defining the Model model = keras.Sequential() # Convolutional layer and maxpool layer 1 model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3))) model.add(keras.layers.MaxPool2D(2,2)) # Convolutional layer and maxpool layer 2 model.add(keras.layers.Conv2D(64,(3,3),activation='relu')) model.add(keras.layers.MaxPool2D(2,2)) # Convolutional layer and maxpool layer 3 model.add(keras.layers.Conv2D(128,(3,3),activation='relu')) model.add(keras.layers.MaxPool2D(2,2)) # Convolutional layer and maxpool layer 4 model.add(keras.layers.Conv2D(128,(3,3),activation='relu')) model.add(keras.layers.MaxPool2D(2,2)) # This layer flattens the resulting image array to 1D array model.add(keras.layers.Flatten()) # Hidden layer with 512 neurons and Rectified Linear Unit activation function model.add(keras.layers.Dense(512,activation='relu')) model.add(keras.layers.Dense(2,activation='sigmoid')) model.compile( loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'] ) train_aug = ImageDataGenerator( rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True ) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) train_aug = ImageDataGenerator( width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True ) model.summary() Training history = model.fit(train_aug.flow(X_train, y_train, batch_size=32), validation_data=(X_test, y_test), validation_steps=len(X_test) / 32, steps_per_epoch=len(X_train) / 32, epochs=100) Generating Predictions y_pred = model.predict(X_test, batch_size=batch_size) Here I have generated random 10 predictions. You can see that most of them have been clssified correctly but, there are inccorect predictions as well. prediction=y_pred[100:110] for index, probability in enumerate(prediction): if probability[1] > 0.5: plt.title('%.2f' % (probability[1]*100) + '% Normal') else: plt.title('%.2f' % ((1-probability[1])*100) + '% Upside Down') plt.imshow(X_test[index]) plt.show() # Generating ROC Curve Remember! the datasize is too small (only 500 images per class), thats why we can not trust these results. y_pred_bin = np.argmax(y_pred, axis=1) y_test_bin = np.argmax(y_test, axis=1) fpr, tpr, thresholds = roc_curve(y_test_bin, y_pred_bin) plt.plot(fpr, tpr) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.rcParams['font.size'] = 12 plt.title('ROC curve for our model') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.grid(True) # Confusion Matrix def plot_confusion_matrix(normalize): classes = ['Normal','Upside Down'] tick_marks = [0.5,1.5] cn = confusion_matrix(y_test_bin, y_pred_bin,normalize=normalize) sns.heatmap(cn,cmap='plasma',annot=True) plt.xticks(tick_marks, classes) plt.yticks(tick_marks, classes) plt.title('Confusion Matrix') plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() print('Confusion Matrix without Normalization') plot_confusion_matrix(normalize=None) print('Confusion Matrix with Normalized Values') plot_confusion_matrix(normalize='true') **Write up:** Link to the model on Hugging Face Hub: Include some examples of misclassified images. *misclassified images are shown in the previous cells.* Please explain what you might do to improve your model's performance on these images in the future (you do not need to implement these suggestions) *Firstly, I would increase the size of the dataset. Next, I would use an ensemble approach to improve the model's accuracy. I usually work with medical imaging data where the difference between images is very small e.g pneumonia X-ray vs normal X-ray. In such instances, using pre-trained networks such as Resnet50, VGG16, etc, and then calculating a weighted average of predictions of each network has always given me the best results. Here in this case the difference between normal and upside-down images is not very big. So, I would use the weighted ensemble approach to solve this problem.*