Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import torch | |
from skimage import filters | |
from sklearn.metrics.pairwise import euclidean_distances | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from copy import deepcopy | |
# ------------------------------------------------------------------------------ | |
# ----- Evaluation metrics for a pair of binary mask images (pred, target) ----- | |
# ------------------------------------------------------------------------------ | |
def get_accuracy(arr1, arr2): | |
"""pixel accuracy | |
Args: | |
arr1 (np.array) | |
arr2 (np.array) | |
""" | |
return (arr1 == arr2).sum() / arr1.size | |
def trimap(pred_im, gt_im, thickness=8): | |
"""Compute accuracy in a region of thickness around the contours | |
for binary images (0-1 values) | |
Args: | |
pred_im (Image): Prediction | |
gt_im (Image): Target | |
thickness (int, optional): [description]. Defaults to 8. | |
""" | |
W, H = gt_im.size | |
contours, hierarchy = cv2.findContours( | |
np.array(gt_im), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE | |
) | |
mask_contour = np.zeros((H, W), dtype=np.int32) | |
cv2.drawContours( | |
mask_contour, contours, -1, (1), thickness=thickness, hierarchy=hierarchy | |
) | |
gt_contour = np.array(gt_im)[np.where(mask_contour > 0)] | |
pred_contour = np.array(pred_im)[np.where(mask_contour > 0)] | |
return get_accuracy(pred_contour, gt_contour) | |
def iou(pred_im, gt_im): | |
""" | |
IoU for binary masks (0-1 values) | |
Args: | |
pred_im ([type]): [description] | |
gt_im ([type]): [description] | |
""" | |
pred = np.array(pred_im) | |
gt = np.array(gt_im) | |
intersection = (pred * gt).sum() | |
union = (pred + gt).sum() - intersection | |
return intersection / union | |
def f1_score(pred_im, gt_im): | |
pred = np.array(pred_im) | |
gt = np.array(gt_im) | |
intersection = (pred * gt).sum() | |
return 2 * intersection / (pred + gt).sum() | |
def accuracy(pred_im, gt_im): | |
pred = np.array(pred_im) | |
gt = np.array(gt_im) | |
if len(gt_im.shape) == 4: | |
assert gt_im.shape[1] == 1 | |
gt_im = gt_im[:, 0, :, :] | |
if len(pred.shape) > len(gt_im.shape): | |
pred = np.argmax(pred, axis=1) | |
return float((pred == gt).sum()) / gt.size | |
def mIOU(pred, label, average="macro"): | |
""" | |
Adapted from: | |
https://stackoverflow.com/questions/62461379/multiclass-semantic-segmentation-model-evaluation | |
Compute the mean IOU from pred and label tensors | |
pred is a tensor N x C x H x W with logits (softmax will be applied) | |
and label is a N x H x W tensor with int labels per pixel | |
this does the same as sklearn's jaccard_score function if you choose average="macro" | |
Args: | |
pred (torch.tensor): predicted logits | |
label (torch.tensor): labels | |
average: "macro" or "weighted" | |
Returns: | |
float: mIOU, can be nan | |
""" | |
num_classes = pred.shape[-3] | |
pred = torch.argmax(pred, dim=1).squeeze(1) | |
present_iou_list = list() | |
pred = pred.view(-1) | |
label = label.view(-1) | |
# Note: Following for loop goes from 0 to (num_classes-1) | |
# and ignore_index is num_classes, thus ignore_index is | |
# not considered in computation of IoU. | |
interesting_classes = ( | |
[*range(num_classes)] if num_classes > 2 else [int(label.max().item())] | |
) | |
weights = [] | |
for sem_class in interesting_classes: | |
pred_inds = pred == sem_class | |
target_inds = label == sem_class | |
if (target_inds.long().sum().item() > 0) or (pred_inds.long().sum().item() > 0): | |
intersection_now = (pred_inds[target_inds]).long().sum().item() | |
union_now = ( | |
pred_inds.long().sum().item() | |
+ target_inds.long().sum().item() | |
- intersection_now | |
) | |
weights.append(pred_inds.long().sum().item()) | |
iou_now = float(intersection_now) / float(union_now) | |
present_iou_list.append(iou_now) | |
if not present_iou_list: | |
return float("nan") | |
elif average == "weighted": | |
weighted_avg = np.sum(np.multiply(weights, present_iou_list) / np.sum(weights)) | |
return weighted_avg | |
else: | |
return np.mean(present_iou_list) | |
def masker_classification_metrics( | |
pred, label, labels_dict={"cannot": 0, "must": 1, "may": 2} | |
): | |
""" | |
Classification metrics for the masker, and the corresponding maps. If the | |
predictions are soft, the errors are weighted accordingly. Metrics computed: | |
tpr : float | |
True positive rate | |
tpt : float | |
True positive total (divided by total population) | |
tnr : float | |
True negative rate | |
tnt : float | |
True negative total (divided by total population) | |
fpr : float | |
False positive rate: rate of predicted mask on cannot flood | |
fpt : float | |
False positive total (divided by total population) | |
fnr : float | |
False negative rate: rate of missed mask on must flood | |
fnt : float | |
False negative total (divided by total population) | |
mnr : float | |
"May" negative rate (labeled as "may", predicted as no-mask) | |
mpr : float | |
"May" positive rate (labeled as "may", predicted as mask) | |
accuracy : float | |
Accuracy | |
error : float | |
Error | |
precision : float | |
Precision, considering only cannot and must flood labels | |
f05 : float | |
F0.5 score, considering only cannot and must flood labels | |
accuracy_must_may : float | |
Accuracy considering only the must and may areas | |
Parameters | |
---------- | |
pred : array-like | |
Mask prediction | |
label : array-like | |
Mask ground truth labels | |
labels_dict : dict | |
A dictionary with the identifier of each class (cannot, must, may) | |
Returns | |
------- | |
metrics_dict : dict | |
A dictionary with metric name and value pairs | |
maps_dict : dict | |
A dictionary containing the metric maps | |
""" | |
tp_map = pred * np.asarray(label == labels_dict["must"], dtype=int) | |
tpr = np.sum(tp_map) / np.sum(label == labels_dict["must"]) | |
tpt = np.sum(tp_map) / np.prod(label.shape) | |
tn_map = (1.0 - pred) * np.asarray(label == labels_dict["cannot"], dtype=int) | |
tnr = np.sum(tn_map) / np.sum(label == labels_dict["cannot"]) | |
tnt = np.sum(tn_map) / np.prod(label.shape) | |
fp_map = pred * np.asarray(label == labels_dict["cannot"], dtype=int) | |
fpr = np.sum(fp_map) / np.sum(label == labels_dict["cannot"]) | |
fpt = np.sum(fp_map) / np.prod(label.shape) | |
fn_map = (1.0 - pred) * np.asarray(label == labels_dict["must"], dtype=int) | |
fnr = np.sum(fn_map) / np.sum(label == labels_dict["must"]) | |
fnt = np.sum(fn_map) / np.prod(label.shape) | |
may_neg_map = (1.0 - pred) * np.asarray(label == labels_dict["may"], dtype=int) | |
may_pos_map = pred * np.asarray(label == labels_dict["may"], dtype=int) | |
mnr = np.sum(may_neg_map) / np.sum(label == labels_dict["may"]) | |
mpr = np.sum(may_pos_map) / np.sum(label == labels_dict["may"]) | |
accuracy = tpt + tnt | |
error = fpt + fnt | |
# Assertions | |
assert np.isclose(tpr, 1.0 - fnr), "TPR: {:.4f}, FNR: {:.4f}".format(tpr, fnr) | |
assert np.isclose(tnr, 1.0 - fpr), "TNR: {:.4f}, FPR: {:.4f}".format(tnr, fpr) | |
assert np.isclose(mpr, 1.0 - mnr), "MPR: {:.4f}, MNR: {:.4f}".format(mpr, mnr) | |
precision = np.sum(tp_map) / (np.sum(tp_map) + np.sum(fp_map) + 1e-9) | |
beta = 0.5 | |
f05 = ((1 + beta ** 2) * precision * tpr) / (beta ** 2 * precision + tpr + 1e-9) | |
accuracy_must_may = (np.sum(tp_map) + np.sum(may_neg_map)) / ( | |
np.sum(label == labels_dict["must"]) + np.sum(label == labels_dict["may"]) | |
) | |
metrics_dict = { | |
"tpr": tpr, | |
"tpt": tpt, | |
"tnr": tnr, | |
"tnt": tnt, | |
"fpr": fpr, | |
"fpt": fpt, | |
"fnr": fnr, | |
"fnt": fnt, | |
"mpr": mpr, | |
"mnr": mnr, | |
"accuracy": accuracy, | |
"error": error, | |
"precision": precision, | |
"f05": f05, | |
"accuracy_must_may": accuracy_must_may, | |
} | |
maps_dict = { | |
"tp": tp_map, | |
"tn": tn_map, | |
"fp": fp_map, | |
"fn": fn_map, | |
"may_pos": may_pos_map, | |
"may_neg": may_neg_map, | |
} | |
return metrics_dict, maps_dict | |
def pred_cannot(pred, label, label_cannot=0): | |
""" | |
Metric for the masker: Computes false positive rate and its map. If the | |
predictions are soft, the errors are weighted accordingly. | |
Parameters | |
---------- | |
pred : array-like | |
Mask prediction | |
label : array-like | |
Mask ground truth labels | |
label_cannot : int | |
The label index of "cannot flood" | |
Returns | |
------- | |
fp_map : array-like | |
The map of false positives: predicted mask on cannot flood | |
fpr : float | |
False positive rate: rate of predicted mask on cannot flood | |
""" | |
fp_map = pred * np.asarray(label == label_cannot, dtype=int) | |
fpr = np.sum(fp_map) / np.sum(label == label_cannot) | |
return fp_map, fpr | |
def missed_must(pred, label, label_must=1): | |
""" | |
Metric for the masker: Computes false negative rate and its map. If the | |
predictions are soft, the errors are weighted accordingly. | |
Parameters | |
---------- | |
pred : array-like | |
Mask prediction | |
label : array-like | |
Mask ground truth labels | |
label_must : int | |
The label index of "must flood" | |
Returns | |
------- | |
fn_map : array-like | |
The map of false negatives: missed mask on must flood | |
fnr : float | |
False negative rate: rate of missed mask on must flood | |
""" | |
fn_map = (1.0 - pred) * np.asarray(label == label_must, dtype=int) | |
fnr = np.sum(fn_map) / np.sum(label == label_must) | |
return fn_map, fnr | |
def may_flood(pred, label, label_may=2): | |
""" | |
Metric for the masker: Computes "may" negative and "may" positive rates and their | |
map. If the predictions are soft, the "errors" are weighted accordingly. | |
Parameters | |
---------- | |
pred : array-like | |
Mask prediction | |
label : array-like | |
Mask ground truth labels | |
label_may : int | |
The label index of "may flood" | |
Returns | |
------- | |
may_neg_map : array-like | |
The map of "may" negatives | |
may_pos_map : array-like | |
The map of "may" positives | |
mnr : float | |
"May" negative rate | |
mpr : float | |
"May" positive rate | |
""" | |
may_neg_map = (1.0 - pred) * np.asarray(label == label_may, dtype=int) | |
may_pos_map = pred * np.asarray(label == label_may, dtype=int) | |
mnr = np.sum(may_neg_map) / np.sum(label == label_may) | |
mpr = np.sum(may_pos_map) / np.sum(label == label_may) | |
return may_neg_map, may_pos_map, mnr, mpr | |
def masker_metrics(pred, label, label_cannot=0, label_must=1): | |
""" | |
Computes a set of metrics for the masker | |
Parameters | |
---------- | |
pred : array-like | |
Mask prediction | |
label : array-like | |
Mask ground truth labels | |
label_must : int | |
The label index of "must flood" | |
label_cannot : int | |
The label index of "cannot flood" | |
Returns | |
------- | |
tpr : float | |
True positive rate | |
tnr : float | |
True negative rate | |
precision : float | |
Precision, considering only cannot and must flood labels | |
f1 : float | |
F1 score, considering only cannot and must flood labels | |
""" | |
tp_map = pred * np.asarray(label == label_must, dtype=int) | |
tpr = np.sum(tp_map) / np.sum(label == label_must) | |
tn_map = (1.0 - pred) * np.asarray(label == label_cannot, dtype=int) | |
tnr = np.sum(tn_map) / np.sum(label == label_cannot) | |
fp_map = pred * np.asarray(label == label_cannot, dtype=int) | |
fn_map = (1.0 - pred) * np.asarray(label == label_must, dtype=int) # noqa: F841 | |
precision = np.sum(tp_map) / (np.sum(tp_map) + np.sum(fp_map)) | |
f1 = 2 * (precision * tpr) / (precision + tpr) | |
return tpr, tnr, precision, f1 | |
def get_confusion_matrix(tpr, tnr, fpr, fnr, mpr, mnr): | |
""" | |
Constructs the confusion matrix of a masker prediction over a set of samples | |
Parameters | |
---------- | |
tpr : vector-like | |
True positive rate | |
tnr : vector-like | |
True negative rate | |
fpr : vector-like | |
False positive rate | |
fnr : vector-like | |
False negative rate | |
mpr : vector-like | |
"May" positive rate | |
mnr : vector-like | |
"May" negative rate | |
Returns | |
------- | |
confusion_matrix : 3x3 array | |
Confusion matrix: [i, j] = [pred, true] | |
| tnr fnr mnr | | |
| fpr tpr mpr | | |
| 0. 0, 0, | | |
confusion_matrix_std : 3x3 array | |
Standard deviation of the confusion matrix | |
""" | |
# Compute mean and standard deviations over all samples | |
tpr_m = np.mean(tpr) | |
tpr_s = np.std(tpr) | |
tnr_m = np.mean(tnr) | |
tnr_s = np.std(tnr) | |
fpr_m = np.mean(fpr) | |
fpr_s = np.std(fpr) | |
fnr_m = np.mean(fnr) | |
fnr_s = np.std(fnr) | |
mpr_m = np.mean(mpr) | |
mpr_s = np.std(mpr) | |
mnr_m = np.mean(mnr) | |
mnr_s = np.std(mnr) | |
# Assertions | |
assert np.isclose(tpr_m, 1.0 - fnr_m), "TPR: {:.4f}, FNR: {:.4f}".format( | |
tpr_m, fnr_m | |
) | |
assert np.isclose(tnr_m, 1.0 - fpr_m), "TNR: {:.4f}, FPR: {:.4f}".format( | |
tnr_m, fpr_m | |
) | |
assert np.isclose(mpr_m, 1.0 - mnr_m), "MPR: {:.4f}, MNR: {:.4f}".format( | |
mpr_m, mnr_m | |
) | |
# Fill confusion matrix | |
confusion_matrix = np.zeros((3, 3)) | |
confusion_matrix[0, 0] = tnr_m | |
confusion_matrix[0, 1] = fnr_m | |
confusion_matrix[0, 2] = mnr_m | |
confusion_matrix[1, 0] = fpr_m | |
confusion_matrix[1, 1] = tpr_m | |
confusion_matrix[1, 2] = mpr_m | |
confusion_matrix[2, 2] = 0.0 | |
# Standard deviation | |
confusion_matrix_std = np.zeros((3, 3)) | |
confusion_matrix_std[0, 0] = tnr_s | |
confusion_matrix_std[0, 1] = fnr_s | |
confusion_matrix_std[0, 2] = mnr_s | |
confusion_matrix_std[1, 0] = fpr_s | |
confusion_matrix_std[1, 1] = tpr_s | |
confusion_matrix_std[1, 2] = mpr_s | |
confusion_matrix_std[2, 2] = 0.0 | |
return confusion_matrix, confusion_matrix_std | |
def edges_coherence_std_min(pred, label, label_must=1, bin_th=0.5): | |
""" | |
The standard deviation of the minimum distance between the edge of the prediction | |
and the edge of the "must flood" label. | |
Parameters | |
---------- | |
pred : array-like | |
Mask prediction | |
label : array-like | |
Mask ground truth labels | |
label_must : int | |
The label index of "must flood" | |
bin_th : float | |
The threshold for the binarization of the prediction | |
Returns | |
------- | |
metric : float | |
The value of the metric | |
pred_edge : array-like | |
The edges images of the prediction, for visualization | |
label_edge : array-like | |
The edges images of the "must flood" label, for visualization | |
""" | |
# Keep must flood label only | |
label = deepcopy(label) | |
label[label != label_must] = -1 | |
label[label == label_must] = 1 | |
label[label != label_must] = 0 | |
label = np.asarray(label, dtype=float) | |
# Binarize prediction | |
pred = np.asarray(pred > bin_th, dtype=float) | |
# Compute edges | |
pred = filters.sobel(pred) | |
label = filters.sobel(label) | |
# Location of edges | |
pred_coord = np.argwhere(pred > 0) | |
label_coord = np.argwhere(label > 0) | |
# Handle blank predictions | |
if pred_coord.shape[0] == 0: | |
return 1.0, pred, label | |
# Normalized pairwise distances between pred and label | |
dist_mat = np.divide(euclidean_distances(pred_coord, label_coord), pred.shape[0]) | |
# Standard deviation of the minimum distance from pred to label | |
edge_coherence = np.std(np.min(dist_mat, axis=1)) | |
return edge_coherence, pred, label | |
def boxplot_metric( | |
output_filename, | |
df, | |
metric, | |
dict_metrics, | |
do_stripplot=False, | |
dict_models=None, | |
dpi=300, | |
**snskwargs | |
): | |
f = plt.figure(dpi=dpi) | |
if do_stripplot: | |
ax = sns.boxplot(x="model", y=metric, data=df, fliersize=0.0, **snskwargs) | |
ax = sns.stripplot( | |
x="model", y=metric, data=df, size=2.0, color="gray", **snskwargs | |
) | |
else: | |
ax = sns.boxplot(x="model", y=metric, data=df, **snskwargs) | |
# Set axes labels | |
ax.set_xlabel("Models", rotation=0, fontsize="medium") | |
ax.set_ylabel(dict_metrics[metric], rotation=90, fontsize="medium") | |
# Spines | |
sns.despine(left=True, bottom=True) | |
# X-Tick labels | |
if dict_models: | |
xticklabels = [dict_models[t.get_text()] for t in ax.get_xticklabels()] | |
ax.set_xticklabels( | |
xticklabels, | |
rotation=20, | |
verticalalignment="top", | |
horizontalalignment="right", | |
fontsize="xx-small", | |
) | |
f.savefig( | |
output_filename, | |
dpi=f.dpi, | |
bbox_inches="tight", | |
facecolor="white", | |
transparent=False, | |
) | |
f.clear() | |
plt.close(f) | |
def clustermap_metric( | |
output_filename, | |
df, | |
metric, | |
dict_metrics, | |
method="average", | |
cluster_metric="euclidean", | |
dict_models=None, | |
dpi=300, | |
**snskwargs | |
): | |
ax_grid = sns.clustermap(data=df, method=method, metric=cluster_metric, **snskwargs) | |
ax_heatmap = ax_grid.ax_heatmap | |
ax_cbar = ax_grid.ax_cbar | |
# Set axes labels | |
ax_heatmap.set_xlabel("Models", rotation=0, fontsize="medium") | |
ax_heatmap.set_ylabel("Images", rotation=90, fontsize="medium") | |
# Set title | |
ax_cbar.set_title(dict_metrics[metric], rotation=0, fontsize="x-large") | |
# X-Tick labels | |
if dict_models: | |
xticklabels = [dict_models[t.get_text()] for t in ax_heatmap.get_xticklabels()] | |
ax_heatmap.set_xticklabels( | |
xticklabels, | |
rotation=20, | |
verticalalignment="top", | |
horizontalalignment="right", | |
fontsize="small", | |
) | |
ax_grid.fig.savefig( | |
output_filename, | |
dpi=dpi, | |
bbox_inches="tight", | |
facecolor="white", | |
transparent=False, | |
) | |
ax_grid.fig.clear() | |
plt.close(ax_grid.fig) | |