File size: 7,922 Bytes
8f16f16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/My Drive/UpsideDownDetector'
#importing all the libraries
from __future__ import print_function, division
from builtins import range, input
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, AveragePooling2D, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve
import matplotlib.pyplot as plt
import cv2
from glob import glob
Defining Parameters
#define size to which images are to be resized
IMAGE_SIZE = [224, 224]
# training config:
epochs = 500
batch_size = 32
#define paths
normal_path = '/content/drive/MyDrive/UpsideDownDetector/Dataset/Normal'
upsideDown_path = '/content/drive/MyDrive/UpsideDownDetector/Dataset/UpsideDown'
# Using glob to grab images from path
normal_files = glob(normal_path + '/*')
upsideDown_files = glob(upsideDown_path + '/*')
Fetching images along with their labels from the dataset.
The dataset only contains images of cats. The problem is treated as a binary classification problem. There are 2 classes. The first one has normal images and the second one has upside down images. **Each class contains 500 images**.
# Preparing Labels
normal_labels = []
upsideDown_labels = []
normal_images=[]
upsideDown_images=[]
import cv2
for i in range(len(normal_files)):
image = cv2.imread(normal_files[i])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(150,150))
normal_images.append(image)
normal_labels.append('Normal')
for i in range(len(upsideDown_files)):
image = cv2.imread(upsideDown_files[i])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(150,150))
upsideDown_images.append(image)
upsideDown_labels.append('Upside Down')
#A look at a random images from the dataset
def plot_images(images, title):
nrows, ncols = 5, 8
figsize = [10, 6]
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, facecolor=(1, 1, 1))
for i, axi in enumerate(ax.flat):
axi.imshow(images[i])
axi.set_axis_off()
plt.suptitle(title, fontsize=24)
plt.tight_layout(pad=0.2, rect=[0, 0, 1, 0.9])
plt.show()
plot_images(normal_images, 'Normal')
plot_images(upsideDown_images, 'Upside Down')
# normalizing to interval of [0,1]
normal_images = np.array(normal_images) / 255
upsideDown_images = np.array(upsideDown_images) / 255
Train/Test Split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical
# split into training and testing
normal_x_train, normal_x_test, normal_y_train, normal_y_test = train_test_split(
normal_images, normal_labels, test_size=0.2)
upsideDown_x_train, upsideDown_x_test, upsideDown_y_train, upsideDown_y_test = train_test_split(
upsideDown_images, upsideDown_labels, test_size=0.2)
X_train = np.concatenate((upsideDown_x_train, normal_x_train), axis=0)
X_test = np.concatenate((upsideDown_x_test, normal_x_test), axis=0)
y_train = np.concatenate((upsideDown_y_train, normal_y_train), axis=0)
y_test = np.concatenate((upsideDown_y_test, normal_y_test), axis=0)
# make labels into categories - either 0 or 1
y_train = LabelBinarizer().fit_transform(y_train)
y_train = to_categorical(y_train)
y_test = LabelBinarizer().fit_transform(y_test)
y_test = to_categorical(y_test)
plot_images(normal_x_train, 'X_train')
plot_images(normal_x_test, 'X_test')
Defining the Model
model = keras.Sequential()
# Convolutional layer and maxpool layer 1
model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model.add(keras.layers.MaxPool2D(2,2))
# Convolutional layer and maxpool layer 2
model.add(keras.layers.Conv2D(64,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
# Convolutional layer and maxpool layer 3
model.add(keras.layers.Conv2D(128,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
# Convolutional layer and maxpool layer 4
model.add(keras.layers.Conv2D(128,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
# This layer flattens the resulting image array to 1D array
model.add(keras.layers.Flatten())
# Hidden layer with 512 neurons and Rectified Linear Unit activation function
model.add(keras.layers.Dense(512,activation='relu'))
model.add(keras.layers.Dense(2,activation='sigmoid'))
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
train_aug = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True
)
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
train_aug = ImageDataGenerator(
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True
)
model.summary()
Training
history = model.fit(train_aug.flow(X_train, y_train, batch_size=32),
validation_data=(X_test, y_test),
validation_steps=len(X_test) / 32,
steps_per_epoch=len(X_train) / 32,
epochs=100)
Generating Predictions
y_pred = model.predict(X_test, batch_size=batch_size)
Here I have generated random 10 predictions. You can see that most of them have been clssified correctly but, there are inccorect predictions as well.
prediction=y_pred[100:110]
for index, probability in enumerate(prediction):
if probability[1] > 0.5:
plt.title('%.2f' % (probability[1]*100) + '% Normal')
else:
plt.title('%.2f' % ((1-probability[1])*100) + '% Upside Down')
plt.imshow(X_test[index])
plt.show()
# Generating ROC Curve
Remember! the datasize is too small (only 500 images per class), thats why we can not trust these results.
y_pred_bin = np.argmax(y_pred, axis=1)
y_test_bin = np.argmax(y_test, axis=1)
fpr, tpr, thresholds = roc_curve(y_test_bin, y_pred_bin)
plt.plot(fpr, tpr)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.rcParams['font.size'] = 12
plt.title('ROC curve for our model')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid(True)
# Confusion Matrix
def plot_confusion_matrix(normalize):
classes = ['Normal','Upside Down']
tick_marks = [0.5,1.5]
cn = confusion_matrix(y_test_bin, y_pred_bin,normalize=normalize)
sns.heatmap(cn,cmap='plasma',annot=True)
plt.xticks(tick_marks, classes)
plt.yticks(tick_marks, classes)
plt.title('Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
print('Confusion Matrix without Normalization')
plot_confusion_matrix(normalize=None)
print('Confusion Matrix with Normalized Values')
plot_confusion_matrix(normalize='true')
**Write up:**
Link to the model on Hugging Face Hub:
Include some examples of misclassified images.
*misclassified images are shown in the previous cells.*
Please explain what you might do to improve your model's performance on these images in the future (you do not need to implement these suggestions)
*Firstly, I would increase the size of the dataset. Next, I would use an ensemble approach to improve the model's accuracy. I usually work with medical imaging data where the difference between images is very small e.g pneumonia X-ray vs normal X-ray. In such instances, using pre-trained networks such as Resnet50, VGG16, etc, and then calculating a weighted average of predictions of each network has always given me the best results. Here in this case the difference between normal and upside-down images is not very big. So, I would use the weighted ensemble approach to solve this problem.* |