Vrk commited on
Commit
a2ba2c5
1 Parent(s): 5caaae1
Files changed (1) hide show
  1. helper_functions.py +288 -0
helper_functions.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### We create a bunch of helpful functions throughout the course.
2
+ ### Storing them here so they're easily accessible.
3
+
4
+ import tensorflow as tf
5
+
6
+ # Create a function to import an image and resize it to be able to be used with our model
7
+ def load_and_prep_image(filename, img_shape=224, scale=True):
8
+ """
9
+ Reads in an image from filename, turns it into a tensor and reshapes into
10
+ (224, 224, 3).
11
+
12
+ Parameters
13
+ ----------
14
+ filename (str): string filename of target image
15
+ img_shape (int): size to resize target image to, default 224
16
+ scale (bool): whether to scale pixel values to range(0, 1), default True
17
+ """
18
+ # Read in the image
19
+ img = tf.io.read_file(filename)
20
+ # Decode it into a tensor
21
+ img = tf.image.decode_jpeg(img)
22
+ # Resize the image
23
+ img = tf.image.resize(img, [img_shape, img_shape])
24
+ if scale:
25
+ # Rescale the image (get all values between 0 and 1)
26
+ return img/255.
27
+ else:
28
+ return img
29
+
30
+ # Note: The following confusion matrix code is a remix of Scikit-Learn's
31
+ # plot_confusion_matrix function - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.plot_confusion_matrix.html
32
+ import itertools
33
+ import matplotlib.pyplot as plt
34
+ import numpy as np
35
+ from sklearn.metrics import confusion_matrix
36
+
37
+ # Our function needs a different name to sklearn's plot_confusion_matrix
38
+ def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False):
39
+ """Makes a labelled confusion matrix comparing predictions and ground truth labels.
40
+
41
+ If classes is passed, confusion matrix will be labelled, if not, integer class values
42
+ will be used.
43
+
44
+ Args:
45
+ y_true: Array of truth labels (must be same shape as y_pred).
46
+ y_pred: Array of predicted labels (must be same shape as y_true).
47
+ classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
48
+ figsize: Size of output figure (default=(10, 10)).
49
+ text_size: Size of output figure text (default=15).
50
+ norm: normalize values or not (default=False).
51
+ savefig: save confusion matrix to file (default=False).
52
+
53
+ Returns:
54
+ A labelled confusion matrix plot comparing y_true and y_pred.
55
+
56
+ Example usage:
57
+ make_confusion_matrix(y_true=test_labels, # ground truth test labels
58
+ y_pred=y_preds, # predicted labels
59
+ classes=class_names, # array of class label names
60
+ figsize=(15, 15),
61
+ text_size=10)
62
+ """
63
+ # Create the confustion matrix
64
+ cm = confusion_matrix(y_true, y_pred)
65
+ cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
66
+ n_classes = cm.shape[0] # find the number of classes we're dealing with
67
+
68
+ # Plot the figure and make it pretty
69
+ fig, ax = plt.subplots(figsize=figsize)
70
+ cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
71
+ fig.colorbar(cax)
72
+
73
+ # Are there a list of classes?
74
+ if classes:
75
+ labels = classes
76
+ else:
77
+ labels = np.arange(cm.shape[0])
78
+
79
+ # Label the axes
80
+ ax.set(title="Confusion Matrix",
81
+ xlabel="Predicted label",
82
+ ylabel="True label",
83
+ xticks=np.arange(n_classes), # create enough axis slots for each class
84
+ yticks=np.arange(n_classes),
85
+ xticklabels=labels, # axes will labeled with class names (if they exist) or ints
86
+ yticklabels=labels)
87
+
88
+ # Make x-axis labels appear on bottom
89
+ ax.xaxis.set_label_position("bottom")
90
+ ax.xaxis.tick_bottom()
91
+
92
+ # Set the threshold for different colors
93
+ threshold = (cm.max() + cm.min()) / 2.
94
+
95
+ # Plot the text on each cell
96
+ for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
97
+ if norm:
98
+ plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
99
+ horizontalalignment="center",
100
+ color="white" if cm[i, j] > threshold else "black",
101
+ size=text_size)
102
+ else:
103
+ plt.text(j, i, f"{cm[i, j]}",
104
+ horizontalalignment="center",
105
+ color="white" if cm[i, j] > threshold else "black",
106
+ size=text_size)
107
+
108
+ # Save the figure to the current working directory
109
+ if savefig:
110
+ fig.savefig("confusion_matrix.png")
111
+
112
+ # Make a function to predict on images and plot them (works with multi-class)
113
+ def pred_and_plot(model, filename, class_names):
114
+ """
115
+ Imports an image located at filename, makes a prediction on it with
116
+ a trained model and plots the image with the predicted class as the title.
117
+ """
118
+ # Import the target image and preprocess it
119
+ img = load_and_prep_image(filename)
120
+
121
+ # Make a prediction
122
+ pred = model.predict(tf.expand_dims(img, axis=0))
123
+
124
+ # Get the predicted class
125
+ if len(pred[0]) > 1: # check for multi-class
126
+ pred_class = class_names[pred.argmax()] # if more than one output, take the max
127
+ else:
128
+ pred_class = class_names[int(tf.round(pred)[0][0])] # if only one output, round
129
+
130
+ # Plot the image and predicted class
131
+ plt.imshow(img)
132
+ plt.title(f"Prediction: {pred_class}")
133
+ plt.axis(False);
134
+
135
+ import datetime
136
+
137
+ def create_tensorboard_callback(dir_name, experiment_name):
138
+ """
139
+ Creates a TensorBoard callback instand to store log files.
140
+
141
+ Stores log files with the filepath:
142
+ "dir_name/experiment_name/current_datetime/"
143
+
144
+ Args:
145
+ dir_name: target directory to store TensorBoard log files
146
+ experiment_name: name of experiment directory (e.g. efficientnet_model_1)
147
+ """
148
+ log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
149
+ tensorboard_callback = tf.keras.callbacks.TensorBoard(
150
+ log_dir=log_dir
151
+ )
152
+ print(f"Saving TensorBoard log files to: {log_dir}")
153
+ return tensorboard_callback
154
+
155
+ # Plot the validation and training data separately
156
+ import matplotlib.pyplot as plt
157
+
158
+ def plot_loss_curves(history):
159
+ """
160
+ Returns separate loss curves for training and validation metrics.
161
+
162
+ Args:
163
+ history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History)
164
+ """
165
+ loss = history.history['loss']
166
+ val_loss = history.history['val_loss']
167
+
168
+ accuracy = history.history['accuracy']
169
+ val_accuracy = history.history['val_accuracy']
170
+
171
+ epochs = range(len(history.history['loss']))
172
+
173
+ # Plot loss
174
+ plt.plot(epochs, loss, label='training_loss')
175
+ plt.plot(epochs, val_loss, label='val_loss')
176
+ plt.title('Loss')
177
+ plt.xlabel('Epochs')
178
+ plt.legend()
179
+
180
+ # Plot accuracy
181
+ plt.figure()
182
+ plt.plot(epochs, accuracy, label='training_accuracy')
183
+ plt.plot(epochs, val_accuracy, label='val_accuracy')
184
+ plt.title('Accuracy')
185
+ plt.xlabel('Epochs')
186
+ plt.legend();
187
+
188
+ def compare_historys(original_history, new_history, initial_epochs=5):
189
+ """
190
+ Compares two TensorFlow model History objects.
191
+
192
+ Args:
193
+ original_history: History object from original model (before new_history)
194
+ new_history: History object from continued model training (after original_history)
195
+ initial_epochs: Number of epochs in original_history (new_history plot starts from here)
196
+ """
197
+
198
+ # Get original history measurements
199
+ acc = original_history.history["accuracy"]
200
+ loss = original_history.history["loss"]
201
+
202
+ val_acc = original_history.history["val_accuracy"]
203
+ val_loss = original_history.history["val_loss"]
204
+
205
+ # Combine original history with new history
206
+ total_acc = acc + new_history.history["accuracy"]
207
+ total_loss = loss + new_history.history["loss"]
208
+
209
+ total_val_acc = val_acc + new_history.history["val_accuracy"]
210
+ total_val_loss = val_loss + new_history.history["val_loss"]
211
+
212
+ # Make plots
213
+ plt.figure(figsize=(8, 8))
214
+ plt.subplot(2, 1, 1)
215
+ plt.plot(total_acc, label='Training Accuracy')
216
+ plt.plot(total_val_acc, label='Validation Accuracy')
217
+ plt.plot([initial_epochs-1, initial_epochs-1],
218
+ plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
219
+ plt.legend(loc='lower right')
220
+ plt.title('Training and Validation Accuracy')
221
+
222
+ plt.subplot(2, 1, 2)
223
+ plt.plot(total_loss, label='Training Loss')
224
+ plt.plot(total_val_loss, label='Validation Loss')
225
+ plt.plot([initial_epochs-1, initial_epochs-1],
226
+ plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
227
+ plt.legend(loc='upper right')
228
+ plt.title('Training and Validation Loss')
229
+ plt.xlabel('epoch')
230
+ plt.show()
231
+
232
+ # Create function to unzip a zipfile into current working directory
233
+ # (since we're going to be downloading and unzipping a few files)
234
+ import zipfile
235
+
236
+ def unzip_data(filename):
237
+ """
238
+ Unzips filename into the current working directory.
239
+
240
+ Args:
241
+ filename (str): a filepath to a target zip folder to be unzipped.
242
+ """
243
+ zip_ref = zipfile.ZipFile(filename, "r")
244
+ zip_ref.extractall()
245
+ zip_ref.close()
246
+
247
+ # Walk through an image classification directory and find out how many files (images)
248
+ # are in each subdirectory.
249
+ import os
250
+
251
+ def walk_through_dir(dir_path):
252
+ """
253
+ Walks through dir_path returning its contents.
254
+
255
+ Args:
256
+ dir_path (str): target directory
257
+
258
+ Returns:
259
+ A print out of:
260
+ number of subdiretories in dir_path
261
+ number of images (files) in each subdirectory
262
+ name of each subdirectory
263
+ """
264
+ for dirpath, dirnames, filenames in os.walk(dir_path):
265
+ print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")
266
+
267
+ # Function to evaluate: accuracy, precision, recall, f1-score
268
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
269
+
270
+ def calculate_results(y_true, y_pred):
271
+ """
272
+ Calculates model accuracy, precision, recall and f1 score of a binary classification model.
273
+
274
+ Args:
275
+ y_true: true labels in the form of a 1D array
276
+ y_pred: predicted labels in the form of a 1D array
277
+
278
+ Returns a dictionary of accuracy, precision, recall, f1-score.
279
+ """
280
+ # Calculate model accuracy
281
+ model_accuracy = accuracy_score(y_true, y_pred) * 100
282
+ # Calculate model precision, recall and f1 score using "weighted average
283
+ model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
284
+ model_results = {"accuracy": model_accuracy,
285
+ "precision": model_precision,
286
+ "recall": model_recall,
287
+ "f1": model_f1}
288
+ return model_results