import cv2 import numpy as np import torch from skimage import filters from sklearn.metrics.pairwise import euclidean_distances import matplotlib.pyplot as plt import seaborn as sns from copy import deepcopy # ------------------------------------------------------------------------------ # ----- Evaluation metrics for a pair of binary mask images (pred, target) ----- # ------------------------------------------------------------------------------ def get_accuracy(arr1, arr2): """pixel accuracy Args: arr1 (np.array) arr2 (np.array) """ return (arr1 == arr2).sum() / arr1.size def trimap(pred_im, gt_im, thickness=8): """Compute accuracy in a region of thickness around the contours for binary images (0-1 values) Args: pred_im (Image): Prediction gt_im (Image): Target thickness (int, optional): [description]. Defaults to 8. """ W, H = gt_im.size contours, hierarchy = cv2.findContours( np.array(gt_im), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE ) mask_contour = np.zeros((H, W), dtype=np.int32) cv2.drawContours( mask_contour, contours, -1, (1), thickness=thickness, hierarchy=hierarchy ) gt_contour = np.array(gt_im)[np.where(mask_contour > 0)] pred_contour = np.array(pred_im)[np.where(mask_contour > 0)] return get_accuracy(pred_contour, gt_contour) def iou(pred_im, gt_im): """ IoU for binary masks (0-1 values) Args: pred_im ([type]): [description] gt_im ([type]): [description] """ pred = np.array(pred_im) gt = np.array(gt_im) intersection = (pred * gt).sum() union = (pred + gt).sum() - intersection return intersection / union def f1_score(pred_im, gt_im): pred = np.array(pred_im) gt = np.array(gt_im) intersection = (pred * gt).sum() return 2 * intersection / (pred + gt).sum() def accuracy(pred_im, gt_im): pred = np.array(pred_im) gt = np.array(gt_im) if len(gt_im.shape) == 4: assert gt_im.shape[1] == 1 gt_im = gt_im[:, 0, :, :] if len(pred.shape) > len(gt_im.shape): pred = np.argmax(pred, axis=1) return float((pred == gt).sum()) / gt.size def mIOU(pred, label, average="macro"): """ Adapted from: https://stackoverflow.com/questions/62461379/multiclass-semantic-segmentation-model-evaluation Compute the mean IOU from pred and label tensors pred is a tensor N x C x H x W with logits (softmax will be applied) and label is a N x H x W tensor with int labels per pixel this does the same as sklearn's jaccard_score function if you choose average="macro" Args: pred (torch.tensor): predicted logits label (torch.tensor): labels average: "macro" or "weighted" Returns: float: mIOU, can be nan """ num_classes = pred.shape[-3] pred = torch.argmax(pred, dim=1).squeeze(1) present_iou_list = list() pred = pred.view(-1) label = label.view(-1) # Note: Following for loop goes from 0 to (num_classes-1) # and ignore_index is num_classes, thus ignore_index is # not considered in computation of IoU. interesting_classes = ( [*range(num_classes)] if num_classes > 2 else [int(label.max().item())] ) weights = [] for sem_class in interesting_classes: pred_inds = pred == sem_class target_inds = label == sem_class if (target_inds.long().sum().item() > 0) or (pred_inds.long().sum().item() > 0): intersection_now = (pred_inds[target_inds]).long().sum().item() union_now = ( pred_inds.long().sum().item() + target_inds.long().sum().item() - intersection_now ) weights.append(pred_inds.long().sum().item()) iou_now = float(intersection_now) / float(union_now) present_iou_list.append(iou_now) if not present_iou_list: return float("nan") elif average == "weighted": weighted_avg = np.sum(np.multiply(weights, present_iou_list) / np.sum(weights)) return weighted_avg else: return np.mean(present_iou_list) def masker_classification_metrics( pred, label, labels_dict={"cannot": 0, "must": 1, "may": 2} ): """ Classification metrics for the masker, and the corresponding maps. If the predictions are soft, the errors are weighted accordingly. Metrics computed: tpr : float True positive rate tpt : float True positive total (divided by total population) tnr : float True negative rate tnt : float True negative total (divided by total population) fpr : float False positive rate: rate of predicted mask on cannot flood fpt : float False positive total (divided by total population) fnr : float False negative rate: rate of missed mask on must flood fnt : float False negative total (divided by total population) mnr : float "May" negative rate (labeled as "may", predicted as no-mask) mpr : float "May" positive rate (labeled as "may", predicted as mask) accuracy : float Accuracy error : float Error precision : float Precision, considering only cannot and must flood labels f05 : float F0.5 score, considering only cannot and must flood labels accuracy_must_may : float Accuracy considering only the must and may areas Parameters ---------- pred : array-like Mask prediction label : array-like Mask ground truth labels labels_dict : dict A dictionary with the identifier of each class (cannot, must, may) Returns ------- metrics_dict : dict A dictionary with metric name and value pairs maps_dict : dict A dictionary containing the metric maps """ tp_map = pred * np.asarray(label == labels_dict["must"], dtype=int) tpr = np.sum(tp_map) / np.sum(label == labels_dict["must"]) tpt = np.sum(tp_map) / np.prod(label.shape) tn_map = (1.0 - pred) * np.asarray(label == labels_dict["cannot"], dtype=int) tnr = np.sum(tn_map) / np.sum(label == labels_dict["cannot"]) tnt = np.sum(tn_map) / np.prod(label.shape) fp_map = pred * np.asarray(label == labels_dict["cannot"], dtype=int) fpr = np.sum(fp_map) / np.sum(label == labels_dict["cannot"]) fpt = np.sum(fp_map) / np.prod(label.shape) fn_map = (1.0 - pred) * np.asarray(label == labels_dict["must"], dtype=int) fnr = np.sum(fn_map) / np.sum(label == labels_dict["must"]) fnt = np.sum(fn_map) / np.prod(label.shape) may_neg_map = (1.0 - pred) * np.asarray(label == labels_dict["may"], dtype=int) may_pos_map = pred * np.asarray(label == labels_dict["may"], dtype=int) mnr = np.sum(may_neg_map) / np.sum(label == labels_dict["may"]) mpr = np.sum(may_pos_map) / np.sum(label == labels_dict["may"]) accuracy = tpt + tnt error = fpt + fnt # Assertions assert np.isclose(tpr, 1.0 - fnr), "TPR: {:.4f}, FNR: {:.4f}".format(tpr, fnr) assert np.isclose(tnr, 1.0 - fpr), "TNR: {:.4f}, FPR: {:.4f}".format(tnr, fpr) assert np.isclose(mpr, 1.0 - mnr), "MPR: {:.4f}, MNR: {:.4f}".format(mpr, mnr) precision = np.sum(tp_map) / (np.sum(tp_map) + np.sum(fp_map) + 1e-9) beta = 0.5 f05 = ((1 + beta ** 2) * precision * tpr) / (beta ** 2 * precision + tpr + 1e-9) accuracy_must_may = (np.sum(tp_map) + np.sum(may_neg_map)) / ( np.sum(label == labels_dict["must"]) + np.sum(label == labels_dict["may"]) ) metrics_dict = { "tpr": tpr, "tpt": tpt, "tnr": tnr, "tnt": tnt, "fpr": fpr, "fpt": fpt, "fnr": fnr, "fnt": fnt, "mpr": mpr, "mnr": mnr, "accuracy": accuracy, "error": error, "precision": precision, "f05": f05, "accuracy_must_may": accuracy_must_may, } maps_dict = { "tp": tp_map, "tn": tn_map, "fp": fp_map, "fn": fn_map, "may_pos": may_pos_map, "may_neg": may_neg_map, } return metrics_dict, maps_dict def pred_cannot(pred, label, label_cannot=0): """ Metric for the masker: Computes false positive rate and its map. If the predictions are soft, the errors are weighted accordingly. Parameters ---------- pred : array-like Mask prediction label : array-like Mask ground truth labels label_cannot : int The label index of "cannot flood" Returns ------- fp_map : array-like The map of false positives: predicted mask on cannot flood fpr : float False positive rate: rate of predicted mask on cannot flood """ fp_map = pred * np.asarray(label == label_cannot, dtype=int) fpr = np.sum(fp_map) / np.sum(label == label_cannot) return fp_map, fpr def missed_must(pred, label, label_must=1): """ Metric for the masker: Computes false negative rate and its map. If the predictions are soft, the errors are weighted accordingly. Parameters ---------- pred : array-like Mask prediction label : array-like Mask ground truth labels label_must : int The label index of "must flood" Returns ------- fn_map : array-like The map of false negatives: missed mask on must flood fnr : float False negative rate: rate of missed mask on must flood """ fn_map = (1.0 - pred) * np.asarray(label == label_must, dtype=int) fnr = np.sum(fn_map) / np.sum(label == label_must) return fn_map, fnr def may_flood(pred, label, label_may=2): """ Metric for the masker: Computes "may" negative and "may" positive rates and their map. If the predictions are soft, the "errors" are weighted accordingly. Parameters ---------- pred : array-like Mask prediction label : array-like Mask ground truth labels label_may : int The label index of "may flood" Returns ------- may_neg_map : array-like The map of "may" negatives may_pos_map : array-like The map of "may" positives mnr : float "May" negative rate mpr : float "May" positive rate """ may_neg_map = (1.0 - pred) * np.asarray(label == label_may, dtype=int) may_pos_map = pred * np.asarray(label == label_may, dtype=int) mnr = np.sum(may_neg_map) / np.sum(label == label_may) mpr = np.sum(may_pos_map) / np.sum(label == label_may) return may_neg_map, may_pos_map, mnr, mpr def masker_metrics(pred, label, label_cannot=0, label_must=1): """ Computes a set of metrics for the masker Parameters ---------- pred : array-like Mask prediction label : array-like Mask ground truth labels label_must : int The label index of "must flood" label_cannot : int The label index of "cannot flood" Returns ------- tpr : float True positive rate tnr : float True negative rate precision : float Precision, considering only cannot and must flood labels f1 : float F1 score, considering only cannot and must flood labels """ tp_map = pred * np.asarray(label == label_must, dtype=int) tpr = np.sum(tp_map) / np.sum(label == label_must) tn_map = (1.0 - pred) * np.asarray(label == label_cannot, dtype=int) tnr = np.sum(tn_map) / np.sum(label == label_cannot) fp_map = pred * np.asarray(label == label_cannot, dtype=int) fn_map = (1.0 - pred) * np.asarray(label == label_must, dtype=int) # noqa: F841 precision = np.sum(tp_map) / (np.sum(tp_map) + np.sum(fp_map)) f1 = 2 * (precision * tpr) / (precision + tpr) return tpr, tnr, precision, f1 def get_confusion_matrix(tpr, tnr, fpr, fnr, mpr, mnr): """ Constructs the confusion matrix of a masker prediction over a set of samples Parameters ---------- tpr : vector-like True positive rate tnr : vector-like True negative rate fpr : vector-like False positive rate fnr : vector-like False negative rate mpr : vector-like "May" positive rate mnr : vector-like "May" negative rate Returns ------- confusion_matrix : 3x3 array Confusion matrix: [i, j] = [pred, true] | tnr fnr mnr | | fpr tpr mpr | | 0. 0, 0, | confusion_matrix_std : 3x3 array Standard deviation of the confusion matrix """ # Compute mean and standard deviations over all samples tpr_m = np.mean(tpr) tpr_s = np.std(tpr) tnr_m = np.mean(tnr) tnr_s = np.std(tnr) fpr_m = np.mean(fpr) fpr_s = np.std(fpr) fnr_m = np.mean(fnr) fnr_s = np.std(fnr) mpr_m = np.mean(mpr) mpr_s = np.std(mpr) mnr_m = np.mean(mnr) mnr_s = np.std(mnr) # Assertions assert np.isclose(tpr_m, 1.0 - fnr_m), "TPR: {:.4f}, FNR: {:.4f}".format( tpr_m, fnr_m ) assert np.isclose(tnr_m, 1.0 - fpr_m), "TNR: {:.4f}, FPR: {:.4f}".format( tnr_m, fpr_m ) assert np.isclose(mpr_m, 1.0 - mnr_m), "MPR: {:.4f}, MNR: {:.4f}".format( mpr_m, mnr_m ) # Fill confusion matrix confusion_matrix = np.zeros((3, 3)) confusion_matrix[0, 0] = tnr_m confusion_matrix[0, 1] = fnr_m confusion_matrix[0, 2] = mnr_m confusion_matrix[1, 0] = fpr_m confusion_matrix[1, 1] = tpr_m confusion_matrix[1, 2] = mpr_m confusion_matrix[2, 2] = 0.0 # Standard deviation confusion_matrix_std = np.zeros((3, 3)) confusion_matrix_std[0, 0] = tnr_s confusion_matrix_std[0, 1] = fnr_s confusion_matrix_std[0, 2] = mnr_s confusion_matrix_std[1, 0] = fpr_s confusion_matrix_std[1, 1] = tpr_s confusion_matrix_std[1, 2] = mpr_s confusion_matrix_std[2, 2] = 0.0 return confusion_matrix, confusion_matrix_std def edges_coherence_std_min(pred, label, label_must=1, bin_th=0.5): """ The standard deviation of the minimum distance between the edge of the prediction and the edge of the "must flood" label. Parameters ---------- pred : array-like Mask prediction label : array-like Mask ground truth labels label_must : int The label index of "must flood" bin_th : float The threshold for the binarization of the prediction Returns ------- metric : float The value of the metric pred_edge : array-like The edges images of the prediction, for visualization label_edge : array-like The edges images of the "must flood" label, for visualization """ # Keep must flood label only label = deepcopy(label) label[label != label_must] = -1 label[label == label_must] = 1 label[label != label_must] = 0 label = np.asarray(label, dtype=float) # Binarize prediction pred = np.asarray(pred > bin_th, dtype=float) # Compute edges pred = filters.sobel(pred) label = filters.sobel(label) # Location of edges pred_coord = np.argwhere(pred > 0) label_coord = np.argwhere(label > 0) # Handle blank predictions if pred_coord.shape[0] == 0: return 1.0, pred, label # Normalized pairwise distances between pred and label dist_mat = np.divide(euclidean_distances(pred_coord, label_coord), pred.shape[0]) # Standard deviation of the minimum distance from pred to label edge_coherence = np.std(np.min(dist_mat, axis=1)) return edge_coherence, pred, label def boxplot_metric( output_filename, df, metric, dict_metrics, do_stripplot=False, dict_models=None, dpi=300, **snskwargs ): f = plt.figure(dpi=dpi) if do_stripplot: ax = sns.boxplot(x="model", y=metric, data=df, fliersize=0.0, **snskwargs) ax = sns.stripplot( x="model", y=metric, data=df, size=2.0, color="gray", **snskwargs ) else: ax = sns.boxplot(x="model", y=metric, data=df, **snskwargs) # Set axes labels ax.set_xlabel("Models", rotation=0, fontsize="medium") ax.set_ylabel(dict_metrics[metric], rotation=90, fontsize="medium") # Spines sns.despine(left=True, bottom=True) # X-Tick labels if dict_models: xticklabels = [dict_models[t.get_text()] for t in ax.get_xticklabels()] ax.set_xticklabels( xticklabels, rotation=20, verticalalignment="top", horizontalalignment="right", fontsize="xx-small", ) f.savefig( output_filename, dpi=f.dpi, bbox_inches="tight", facecolor="white", transparent=False, ) f.clear() plt.close(f) def clustermap_metric( output_filename, df, metric, dict_metrics, method="average", cluster_metric="euclidean", dict_models=None, dpi=300, **snskwargs ): ax_grid = sns.clustermap(data=df, method=method, metric=cluster_metric, **snskwargs) ax_heatmap = ax_grid.ax_heatmap ax_cbar = ax_grid.ax_cbar # Set axes labels ax_heatmap.set_xlabel("Models", rotation=0, fontsize="medium") ax_heatmap.set_ylabel("Images", rotation=90, fontsize="medium") # Set title ax_cbar.set_title(dict_metrics[metric], rotation=0, fontsize="x-large") # X-Tick labels if dict_models: xticklabels = [dict_models[t.get_text()] for t in ax_heatmap.get_xticklabels()] ax_heatmap.set_xticklabels( xticklabels, rotation=20, verticalalignment="top", horizontalalignment="right", fontsize="small", ) ax_grid.fig.savefig( output_filename, dpi=dpi, bbox_inches="tight", facecolor="white", transparent=False, ) ax_grid.fig.clear() plt.close(ax_grid.fig)