### We create a bunch of helpful functions throughout the course. ### Storing them here so they're easily accessible. import tensorflow as tf # Create a function to import an image and resize it to be able to be used with our model def load_and_prep_image(filename, img_shape=224, scale=True): """ Reads in an image from filename, turns it into a tensor and reshapes into (224, 224, 3). Parameters ---------- filename (str): string filename of target image img_shape (int): size to resize target image to, default 224 scale (bool): whether to scale pixel values to range(0, 1), default True """ # Read in the image img = tf.io.read_file(filename) # Decode it into a tensor img = tf.image.decode_jpeg(img) # Resize the image img = tf.image.resize(img, [img_shape, img_shape]) if scale: # Rescale the image (get all values between 0 and 1) return img/255. else: return img # Note: The following confusion matrix code is a remix of Scikit-Learn's # plot_confusion_matrix function - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.plot_confusion_matrix.html import itertools import matplotlib.pyplot as plt import numpy as np from sklearn.metrics import confusion_matrix # Our function needs a different name to sklearn's plot_confusion_matrix def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): """Makes a labelled confusion matrix comparing predictions and ground truth labels. If classes is passed, confusion matrix will be labelled, if not, integer class values will be used. Args: y_true: Array of truth labels (must be same shape as y_pred). y_pred: Array of predicted labels (must be same shape as y_true). classes: Array of class labels (e.g. string form). If `None`, integer labels are used. figsize: Size of output figure (default=(10, 10)). text_size: Size of output figure text (default=15). norm: normalize values or not (default=False). savefig: save confusion matrix to file (default=False). Returns: A labelled confusion matrix plot comparing y_true and y_pred. Example usage: make_confusion_matrix(y_true=test_labels, # ground truth test labels y_pred=y_preds, # predicted labels classes=class_names, # array of class label names figsize=(15, 15), text_size=10) """ # Create the confustion matrix cm = confusion_matrix(y_true, y_pred) cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it n_classes = cm.shape[0] # find the number of classes we're dealing with # Plot the figure and make it pretty fig, ax = plt.subplots(figsize=figsize) cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better fig.colorbar(cax) # Are there a list of classes? if classes: labels = classes else: labels = np.arange(cm.shape[0]) # Label the axes ax.set(title="Confusion Matrix", xlabel="Predicted label", ylabel="True label", xticks=np.arange(n_classes), # create enough axis slots for each class yticks=np.arange(n_classes), xticklabels=labels, # axes will labeled with class names (if they exist) or ints yticklabels=labels) # Make x-axis labels appear on bottom ax.xaxis.set_label_position("bottom") ax.xaxis.tick_bottom() # Set the threshold for different colors threshold = (cm.max() + cm.min()) / 2. # Plot the text on each cell for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if norm: plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)", horizontalalignment="center", color="white" if cm[i, j] > threshold else "black", size=text_size) else: plt.text(j, i, f"{cm[i, j]}", horizontalalignment="center", color="white" if cm[i, j] > threshold else "black", size=text_size) # Save the figure to the current working directory if savefig: fig.savefig("confusion_matrix.png") # Make a function to predict on images and plot them (works with multi-class) def pred_and_plot(model, filename, class_names): """ Imports an image located at filename, makes a prediction on it with a trained model and plots the image with the predicted class as the title. """ # Import the target image and preprocess it img = load_and_prep_image(filename) # Make a prediction pred = model.predict(tf.expand_dims(img, axis=0)) # Get the predicted class if len(pred[0]) > 1: # check for multi-class pred_class = class_names[pred.argmax()] # if more than one output, take the max else: pred_class = class_names[int(tf.round(pred)[0][0])] # if only one output, round # Plot the image and predicted class plt.imshow(img) plt.title(f"Prediction: {pred_class}") plt.axis(False); import datetime def create_tensorboard_callback(dir_name, experiment_name): """ Creates a TensorBoard callback instand to store log files. Stores log files with the filepath: "dir_name/experiment_name/current_datetime/" Args: dir_name: target directory to store TensorBoard log files experiment_name: name of experiment directory (e.g. efficientnet_model_1) """ log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir ) print(f"Saving TensorBoard log files to: {log_dir}") return tensorboard_callback # Plot the validation and training data separately import matplotlib.pyplot as plt def plot_loss_curves(history): """ Returns separate loss curves for training and validation metrics. Args: history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History) """ loss = history.history['loss'] val_loss = history.history['val_loss'] accuracy = history.history['accuracy'] val_accuracy = history.history['val_accuracy'] epochs = range(len(history.history['loss'])) # Plot loss plt.plot(epochs, loss, label='training_loss') plt.plot(epochs, val_loss, label='val_loss') plt.title('Loss') plt.xlabel('Epochs') plt.legend() # Plot accuracy plt.figure() plt.plot(epochs, accuracy, label='training_accuracy') plt.plot(epochs, val_accuracy, label='val_accuracy') plt.title('Accuracy') plt.xlabel('Epochs') plt.legend(); def compare_historys(original_history, new_history, initial_epochs=5): """ Compares two TensorFlow model History objects. Args: original_history: History object from original model (before new_history) new_history: History object from continued model training (after original_history) initial_epochs: Number of epochs in original_history (new_history plot starts from here) """ # Get original history measurements acc = original_history.history["accuracy"] loss = original_history.history["loss"] val_acc = original_history.history["val_accuracy"] val_loss = original_history.history["val_loss"] # Combine original history with new history total_acc = acc + new_history.history["accuracy"] total_loss = loss + new_history.history["loss"] total_val_acc = val_acc + new_history.history["val_accuracy"] total_val_loss = val_loss + new_history.history["val_loss"] # Make plots plt.figure(figsize=(8, 8)) plt.subplot(2, 1, 1) plt.plot(total_acc, label='Training Accuracy') plt.plot(total_val_acc, label='Validation Accuracy') plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(2, 1, 2) plt.plot(total_loss, label='Training Loss') plt.plot(total_val_loss, label='Validation Loss') plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.xlabel('epoch') plt.show() # Create function to unzip a zipfile into current working directory # (since we're going to be downloading and unzipping a few files) import zipfile def unzip_data(filename): """ Unzips filename into the current working directory. Args: filename (str): a filepath to a target zip folder to be unzipped. """ zip_ref = zipfile.ZipFile(filename, "r") zip_ref.extractall() zip_ref.close() # Walk through an image classification directory and find out how many files (images) # are in each subdirectory. import os def walk_through_dir(dir_path): """ Walks through dir_path returning its contents. Args: dir_path (str): target directory Returns: A print out of: number of subdiretories in dir_path number of images (files) in each subdirectory name of each subdirectory """ for dirpath, dirnames, filenames in os.walk(dir_path): print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.") # Function to evaluate: accuracy, precision, recall, f1-score from sklearn.metrics import accuracy_score, precision_recall_fscore_support def calculate_results(y_true, y_pred): """ Calculates model accuracy, precision, recall and f1 score of a binary classification model. Args: y_true: true labels in the form of a 1D array y_pred: predicted labels in the form of a 1D array Returns a dictionary of accuracy, precision, recall, f1-score. """ # Calculate model accuracy model_accuracy = accuracy_score(y_true, y_pred) * 100 # Calculate model precision, recall and f1 score using "weighted average model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted") model_results = {"accuracy": model_accuracy, "precision": model_precision, "recall": model_recall, "f1": model_f1} return model_results