Spaces:
Running
Running
File size: 8,792 Bytes
32faf2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import numpy as np
from skimage import measure
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
def calculate_max_f1(gt, scores):
precision, recall, thresholds = precision_recall_curve(gt, scores)
a = 2 * precision * recall
b = precision + recall
f1s = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
index = np.argmax(f1s)
max_f1 = f1s[index]
threshold = thresholds[index]
return max_f1, threshold
def metric_cal(scores, gt_list, gt_mask_list, cal_pro=False):
# calculate image-level ROC AUC score
img_scores = scores.reshape(scores.shape[0], -1).max(axis=1)
gt_list = np.asarray(gt_list, dtype=int)
fpr, tpr, _ = roc_curve(gt_list, img_scores)
img_roc_auc = roc_auc_score(gt_list, img_scores)
# print('INFO: image ROCAUC: %.3f' % (img_roc_auc))
img_f1, img_threshold = calculate_max_f1(gt_list, img_scores)
gt_mask = np.asarray(gt_mask_list, dtype=int)
pxl_f1, pxl_threshold = calculate_max_f1(gt_mask.flatten(), scores.flatten())
# calculate per-pixel level ROCAUC
fpr, tpr, _ = roc_curve(gt_mask.flatten(), scores.flatten())
per_pixel_rocauc = roc_auc_score(gt_mask.flatten(), scores.flatten())
# calculate max-f1 region
if cal_pro:
# pro_auc_score = cal_pro_metric(gt_mask_list, scores, fpr_thresh=0.3)
# calculate max-f1 region
max_f1_region = calculate_max_f1_region(gt_mask_list, scores)
else:
# pro_auc_score = 0
# calculate max-f1 region
max_f1_region = 0
result_dict = {'i_roc': img_roc_auc * 100, 'p_roc': per_pixel_rocauc * 100,
'i_f1': img_f1 * 100, 'i_thresh': img_threshold, 'p_f1': pxl_f1 * 100, 'p_thresh': pxl_threshold, 'r_f1': max_f1_region * 100}
return result_dict
def rescale(x):
return (x - x.min()) / (x.max() - x.min())
def cal_pro_metric(labeled_imgs, score_imgs, fpr_thresh=0.3, max_steps=200):
labeled_imgs = np.array(labeled_imgs)
labeled_imgs[labeled_imgs <= 0.45] = 0
labeled_imgs[labeled_imgs > 0.45] = 1
labeled_imgs = labeled_imgs.astype(np.bool)
max_th = score_imgs.max()
min_th = score_imgs.min()
delta = (max_th - min_th) / max_steps
ious_mean = []
ious_std = []
pros_mean = []
pros_std = []
threds = []
fprs = []
binary_score_maps = np.zeros_like(score_imgs, dtype=bool)
for step in range(max_steps):
thred = max_th - step * delta
# segmentation
binary_score_maps[score_imgs <= thred] = 0
binary_score_maps[score_imgs > thred] = 1
pro = [] # per region overlap
iou = [] # per image iou
# pro: find each connected gt region, compute the overlapped pixels between the gt region and predicted region
# iou: for each image, compute the ratio, i.e. intersection/union between the gt and predicted binary map
for i in range(len(binary_score_maps)): # for i th image
# pro (per region level)
label_map = measure.label(labeled_imgs[i], connectivity=2)
props = measure.regionprops(label_map)
for prop in props:
x_min, y_min, x_max, y_max = prop.bbox
cropped_pred_label = binary_score_maps[i][x_min:x_max, y_min:y_max]
# cropped_mask = masks[i][x_min:x_max, y_min:y_max]
cropped_mask = prop.filled_image # corrected!
intersection = np.logical_and(cropped_pred_label, cropped_mask).astype(np.float32).sum()
pro.append(intersection / prop.area)
# iou (per image level)
intersection = np.logical_and(binary_score_maps[i], labeled_imgs[i]).astype(np.float32).sum()
union = np.logical_or(binary_score_maps[i], labeled_imgs[i]).astype(np.float32).sum()
if labeled_imgs[i].any() > 0: # when the gt have no anomaly pixels, skip it
iou.append(intersection / union)
# against steps and average metrics on the testing data
ious_mean.append(np.array(iou).mean())
# print("per image mean iou:", np.array(iou).mean())
ious_std.append(np.array(iou).std())
pros_mean.append(np.array(pro).mean())
pros_std.append(np.array(pro).std())
# fpr for pro-auc
masks_neg = ~labeled_imgs
fpr = np.logical_and(masks_neg, binary_score_maps).sum() / masks_neg.sum()
fprs.append(fpr)
threds.append(thred)
# as array
threds = np.array(threds)
pros_mean = np.array(pros_mean)
pros_std = np.array(pros_std)
fprs = np.array(fprs)
# default 30% fpr vs pro, pro_auc
idx = fprs <= fpr_thresh # find the indexs of fprs that is less than expect_fpr (default 0.3)
fprs_selected = fprs[idx]
fprs_selected = rescale(fprs_selected) # rescale fpr [0,0.3] -> [0, 1]
pros_mean_selected = pros_mean[idx]
pro_auc_score = auc(fprs_selected, pros_mean_selected)
# print("pro auc ({}% FPR):".format(int(expect_fpr * 100)), pro_auc_score)
return pro_auc_score
def calculate_max_f1_region(labeled_imgs, score_imgs, pro_thresh=0.6, max_steps=200):
labeled_imgs = np.array(labeled_imgs)
# labeled_imgs[labeled_imgs <= 0.1] = 0
# labeled_imgs[labeled_imgs > 0.1] = 1
labeled_imgs = labeled_imgs.astype(bool)
max_th = score_imgs.max()
min_th = score_imgs.min()
delta = (max_th - min_th) / max_steps
f1_list = []
recall_list = []
precision_list = []
binary_score_maps = np.zeros_like(score_imgs, dtype=bool)
for step in range(max_steps):
thred = max_th - step * delta
# segmentation
binary_score_maps[score_imgs <= thred] = 0
binary_score_maps[score_imgs > thred] = 1
pro = [] # per region overlap
predict_region_number = 0
gt_region_number = 0
# pro: find each connected gt region, compute the overlapped pixels between the gt region and predicted region
# iou: for each image, compute the ratio, i.e. intersection/union between the gt and predicted binary map
for i in range(len(binary_score_maps)): # for i th image
# pro (per region level)
label_map = measure.label(labeled_imgs[i], connectivity=2)
props = measure.regionprops(label_map)
score_map = measure.label(binary_score_maps[i], connectivity=2)
score_props = measure.regionprops(score_map)
predict_region_number += len(score_props)
gt_region_number += len(props)
# if len(score_props) == 0 or len(props) == 0:
# pro.append(0)
# continue
for score_prop in score_props:
x_min_0, y_min_0, x_max_0, y_max_0 = score_prop.bbox
cur_pros = [0]
for prop in props:
x_min_1, y_min_1, x_max_1, y_max_1 = prop.bbox
x_min = min(x_min_0, x_min_1)
y_min = min(y_min_0, y_min_1)
x_max = max(x_max_0, x_max_1)
y_max = max(y_max_0, y_max_1)
cropped_pred_label = binary_score_maps[i][x_min:x_max, y_min:y_max]
cropped_gt_label = labeled_imgs[i][x_min:x_max, y_min:y_max]
# cropped_mask = masks[i][x_min:x_max, y_min:y_max]
# cropped_mask = prop.filled_image # corrected!
intersection = np.logical_and(cropped_pred_label, cropped_gt_label).astype(np.float32).sum()
union = np.logical_or(cropped_pred_label, cropped_gt_label).astype(np.float32).sum()
cur_pros.append(intersection / union)
pro.append(max(cur_pros))
pro = np.array(pro)
if gt_region_number == 0 or predict_region_number == 0:
print(f'gt_number: {gt_region_number}, pred_number: {predict_region_number}')
recall = 0
precision = 0
f1 = 0
else:
recall = np.array(pro >= pro_thresh).astype(np.float32).sum() / gt_region_number
precision = np.array(pro >= pro_thresh).astype(np.float32).sum() / predict_region_number
if recall == 0 or precision == 0:
f1 = 0
else:
f1 = 2 * recall * precision / (recall + precision)
f1_list.append(f1)
recall_list.append(recall)
precision_list.append(precision)
# as array
f1_list = np.array(f1_list)
max_f1 = f1_list.max()
cor_recall = recall_list[f1_list.argmax()]
cor_precision = precision_list[f1_list.argmax()]
print(f'cor recall: {cor_recall}, cor precision: {cor_precision}')
return max_f1
|