|
|
|
from torchsummary import summary |
|
import torch |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
from torchsummary import summary |
|
import yaml |
|
from pprint import pprint |
|
import random |
|
import numpy as np |
|
import torch.nn as nn |
|
from torchvision import datasets, transforms |
|
|
|
import config |
|
import matplotlib.pyplot as plt |
|
import matplotlib.patches as patches |
|
import numpy as np |
|
import os |
|
import random |
|
import torch |
|
|
|
from collections import Counter |
|
from torch.utils.data import DataLoader |
|
from tqdm import tqdm |
|
|
|
|
|
def iou_width_height(boxes1, boxes2): |
|
""" |
|
Parameters: |
|
boxes1 (tensor): width and height of the first bounding boxes |
|
boxes2 (tensor): width and height of the second bounding boxes |
|
Returns: |
|
tensor: Intersection over union of the corresponding boxes |
|
""" |
|
intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min( |
|
boxes1[..., 1], boxes2[..., 1] |
|
) |
|
union = ( |
|
boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection |
|
) |
|
return intersection / union |
|
|
|
|
|
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"): |
|
""" |
|
Video explanation of this function: |
|
https://youtu.be/XXYG5ZWtjj0 |
|
|
|
This function calculates intersection over union (iou) given pred boxes |
|
and target boxes. |
|
|
|
Parameters: |
|
boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4) |
|
boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4) |
|
box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2) |
|
|
|
Returns: |
|
tensor: Intersection over union for all examples |
|
""" |
|
|
|
if box_format == "midpoint": |
|
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2 |
|
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2 |
|
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2 |
|
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2 |
|
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2 |
|
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2 |
|
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2 |
|
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2 |
|
|
|
if box_format == "corners": |
|
box1_x1 = boxes_preds[..., 0:1] |
|
box1_y1 = boxes_preds[..., 1:2] |
|
box1_x2 = boxes_preds[..., 2:3] |
|
box1_y2 = boxes_preds[..., 3:4] |
|
box2_x1 = boxes_labels[..., 0:1] |
|
box2_y1 = boxes_labels[..., 1:2] |
|
box2_x2 = boxes_labels[..., 2:3] |
|
box2_y2 = boxes_labels[..., 3:4] |
|
|
|
x1 = torch.max(box1_x1, box2_x1) |
|
y1 = torch.max(box1_y1, box2_y1) |
|
x2 = torch.min(box1_x2, box2_x2) |
|
y2 = torch.min(box1_y2, box2_y2) |
|
|
|
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) |
|
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1)) |
|
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1)) |
|
|
|
return intersection / (box1_area + box2_area - intersection + 1e-6) |
|
|
|
|
|
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"): |
|
""" |
|
Video explanation of this function: |
|
https://youtu.be/YDkjWEN8jNA |
|
|
|
Does Non Max Suppression given bboxes |
|
|
|
Parameters: |
|
bboxes (list): list of lists containing all bboxes with each bboxes |
|
specified as [class_pred, prob_score, x1, y1, x2, y2] |
|
iou_threshold (float): threshold where predicted bboxes is correct |
|
threshold (float): threshold to remove predicted bboxes (independent of IoU) |
|
box_format (str): "midpoint" or "corners" used to specify bboxes |
|
|
|
Returns: |
|
list: bboxes after performing NMS given a specific IoU threshold |
|
""" |
|
|
|
assert type(bboxes) == list |
|
|
|
bboxes = [box for box in bboxes if box[1] > threshold] |
|
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True) |
|
bboxes_after_nms = [] |
|
|
|
while bboxes: |
|
chosen_box = bboxes.pop(0) |
|
|
|
bboxes = [ |
|
box |
|
for box in bboxes |
|
if box[0] != chosen_box[0] |
|
or intersection_over_union( |
|
torch.tensor(chosen_box[2:]), |
|
torch.tensor(box[2:]), |
|
box_format=box_format, |
|
) |
|
< iou_threshold |
|
] |
|
|
|
bboxes_after_nms.append(chosen_box) |
|
|
|
return bboxes_after_nms |
|
|
|
|
|
def mean_average_precision( |
|
pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20 |
|
): |
|
""" |
|
Video explanation of this function: |
|
https://youtu.be/FppOzcDvaDI |
|
|
|
This function calculates mean average precision (mAP) |
|
|
|
Parameters: |
|
pred_boxes (list): list of lists containing all bboxes with each bboxes |
|
specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2] |
|
true_boxes (list): Similar as pred_boxes except all the correct ones |
|
iou_threshold (float): threshold where predicted bboxes is correct |
|
box_format (str): "midpoint" or "corners" used to specify bboxes |
|
num_classes (int): number of classes |
|
|
|
Returns: |
|
float: mAP value across all classes given a specific IoU threshold |
|
""" |
|
|
|
|
|
average_precisions = [] |
|
|
|
|
|
epsilon = 1e-6 |
|
|
|
for c in range(num_classes): |
|
detections = [] |
|
ground_truths = [] |
|
|
|
|
|
|
|
|
|
for detection in pred_boxes: |
|
if detection[1] == c: |
|
detections.append(detection) |
|
|
|
for true_box in true_boxes: |
|
if true_box[1] == c: |
|
ground_truths.append(true_box) |
|
|
|
|
|
|
|
|
|
|
|
|
|
amount_bboxes = Counter([gt[0] for gt in ground_truths]) |
|
|
|
|
|
|
|
|
|
for key, val in amount_bboxes.items(): |
|
amount_bboxes[key] = torch.zeros(val) |
|
|
|
|
|
detections.sort(key=lambda x: x[2], reverse=True) |
|
TP = torch.zeros((len(detections))) |
|
FP = torch.zeros((len(detections))) |
|
total_true_bboxes = len(ground_truths) |
|
|
|
|
|
if total_true_bboxes == 0: |
|
continue |
|
|
|
for detection_idx, detection in enumerate(detections): |
|
|
|
|
|
ground_truth_img = [ |
|
bbox for bbox in ground_truths if bbox[0] == detection[0] |
|
] |
|
|
|
num_gts = len(ground_truth_img) |
|
best_iou = 0 |
|
|
|
for idx, gt in enumerate(ground_truth_img): |
|
iou = intersection_over_union( |
|
torch.tensor(detection[3:]), |
|
torch.tensor(gt[3:]), |
|
box_format=box_format, |
|
) |
|
|
|
if iou > best_iou: |
|
best_iou = iou |
|
best_gt_idx = idx |
|
|
|
if best_iou > iou_threshold: |
|
|
|
if amount_bboxes[detection[0]][best_gt_idx] == 0: |
|
|
|
TP[detection_idx] = 1 |
|
amount_bboxes[detection[0]][best_gt_idx] = 1 |
|
else: |
|
FP[detection_idx] = 1 |
|
|
|
|
|
else: |
|
FP[detection_idx] = 1 |
|
|
|
TP_cumsum = torch.cumsum(TP, dim=0) |
|
FP_cumsum = torch.cumsum(FP, dim=0) |
|
recalls = TP_cumsum / (total_true_bboxes + epsilon) |
|
precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon) |
|
precisions = torch.cat((torch.tensor([1]), precisions)) |
|
recalls = torch.cat((torch.tensor([0]), recalls)) |
|
|
|
average_precisions.append(torch.trapz(precisions, recalls)) |
|
|
|
return sum(average_precisions) / len(average_precisions) |
|
|
|
|
|
def plot_image(image, boxes): |
|
"""Plots predicted bounding boxes on the image""" |
|
cmap = plt.get_cmap("tab20b") |
|
class_labels = config.COCO_LABELS if config.DATASET=='COCO' else config.PASCAL_CLASSES |
|
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] |
|
im = np.array(image) |
|
height, width, _ = im.shape |
|
|
|
|
|
fig, ax = plt.subplots(1) |
|
|
|
ax.imshow(im) |
|
|
|
|
|
|
|
|
|
|
|
for box in boxes: |
|
assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" |
|
class_pred = box[0] |
|
box = box[2:] |
|
upper_left_x = box[0] - box[2] / 2 |
|
upper_left_y = box[1] - box[3] / 2 |
|
rect = patches.Rectangle( |
|
(upper_left_x * width, upper_left_y * height), |
|
box[2] * width, |
|
box[3] * height, |
|
linewidth=2, |
|
edgecolor=colors[int(class_pred)], |
|
facecolor="none", |
|
) |
|
|
|
ax.add_patch(rect) |
|
plt.text( |
|
upper_left_x * width, |
|
upper_left_y * height, |
|
s=class_labels[int(class_pred)], |
|
color="white", |
|
verticalalignment="top", |
|
bbox={"color": colors[int(class_pred)], "pad": 0}, |
|
) |
|
|
|
plt.show() |
|
|
|
|
|
def get_evaluation_bboxes( |
|
loader, |
|
model, |
|
iou_threshold, |
|
anchors, |
|
threshold, |
|
box_format="midpoint", |
|
device="cuda", |
|
): |
|
|
|
model.eval() |
|
train_idx = 0 |
|
all_pred_boxes = [] |
|
all_true_boxes = [] |
|
for batch_idx, (x, labels) in enumerate(tqdm(loader)): |
|
x = x.to(device) |
|
|
|
with torch.no_grad(): |
|
predictions = model(x) |
|
|
|
batch_size = x.shape[0] |
|
bboxes = [[] for _ in range(batch_size)] |
|
for i in range(3): |
|
S = predictions[i].shape[2] |
|
anchor = torch.tensor([*anchors[i]]).to(device) * S |
|
boxes_scale_i = cells_to_bboxes( |
|
predictions[i], anchor, S=S, is_preds=True |
|
) |
|
for idx, (box) in enumerate(boxes_scale_i): |
|
bboxes[idx] += box |
|
|
|
|
|
true_bboxes = cells_to_bboxes( |
|
labels[2], anchor, S=S, is_preds=False |
|
) |
|
|
|
for idx in range(batch_size): |
|
nms_boxes = non_max_suppression( |
|
bboxes[idx], |
|
iou_threshold=iou_threshold, |
|
threshold=threshold, |
|
box_format=box_format, |
|
) |
|
|
|
for nms_box in nms_boxes: |
|
all_pred_boxes.append([train_idx] + nms_box) |
|
|
|
for box in true_bboxes[idx]: |
|
if box[1] > threshold: |
|
all_true_boxes.append([train_idx] + box) |
|
|
|
train_idx += 1 |
|
|
|
model.train() |
|
return all_pred_boxes, all_true_boxes |
|
|
|
|
|
def cells_to_bboxes(predictions, anchors, S, is_preds=True): |
|
""" |
|
Scales the predictions coming from the model to |
|
be relative to the entire image such that they for example later |
|
can be plotted or. |
|
INPUT: |
|
predictions: tensor of size (N, 3, S, S, num_classes+5) |
|
anchors: the anchors used for the predictions |
|
S: the number of cells the image is divided in on the width (and height) |
|
is_preds: whether the input is predictions or the true bounding boxes |
|
OUTPUT: |
|
converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index, |
|
object score, bounding box coordinates |
|
""" |
|
BATCH_SIZE = predictions.shape[0] |
|
num_anchors = len(anchors) |
|
box_predictions = predictions[..., 1:5] |
|
if is_preds: |
|
anchors = anchors.reshape(1, len(anchors), 1, 1, 2) |
|
box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2]) |
|
box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors |
|
scores = torch.sigmoid(predictions[..., 0:1]) |
|
best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1) |
|
else: |
|
scores = predictions[..., 0:1] |
|
best_class = predictions[..., 5:6] |
|
|
|
cell_indices = ( |
|
torch.arange(S) |
|
.repeat(predictions.shape[0], 3, S, 1) |
|
.unsqueeze(-1) |
|
.to(predictions.device) |
|
) |
|
x = 1 / S * (box_predictions[..., 0:1] + cell_indices) |
|
y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4)) |
|
w_h = 1 / S * box_predictions[..., 2:4] |
|
converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(BATCH_SIZE, num_anchors * S * S, 6) |
|
return converted_bboxes.tolist() |
|
def check_class_accuracy_batch(model,output,threshold): |
|
model.eval() |
|
tot_class_preds, correct_class = 0, 0 |
|
tot_noobj, correct_noobj = 0, 0 |
|
tot_obj, correct_obj = 0, 0 |
|
pass |
|
|
|
def check_class_accuracy(model, loader,threshold,phase): |
|
model.eval() |
|
tot_class_preds, correct_class = 0, 0 |
|
tot_noobj, correct_noobj = 0, 0 |
|
tot_obj, correct_obj = 0, 0 |
|
|
|
for idx, (x, y) in enumerate(tqdm(loader)): |
|
x = x.to(config.DEVICE) |
|
with torch.no_grad(): |
|
out = model(x) |
|
|
|
for i in range(3): |
|
y[i] = y[i].to(config.DEVICE) |
|
obj = y[i][..., 0] == 1 |
|
noobj = y[i][..., 0] == 0 |
|
|
|
correct_class += torch.sum( |
|
torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj] |
|
) |
|
tot_class_preds += torch.sum(obj) |
|
|
|
obj_preds = torch.sigmoid(out[i][..., 0]) > threshold |
|
correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj]) |
|
tot_obj += torch.sum(obj) |
|
correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj]) |
|
tot_noobj += torch.sum(noobj) |
|
|
|
print(phase + " : " + f"Class accuracy is: {(correct_class/(tot_class_preds+1e-16))*100:2f}%") |
|
print(phase + " : " + f"No obj accuracy is: {(correct_noobj/(tot_noobj+1e-16))*100:2f}%") |
|
print(phase + " : " + f"Obj accuracy is: {(correct_obj/(tot_obj+1e-16))*100:2f}%") |
|
class_accuracy = (correct_class/(tot_class_preds+1e-16))*100 |
|
no_obj_accuracy = (correct_noobj/(tot_noobj+1e-16))*100 |
|
obj_accuracy = (correct_obj/(tot_obj+1e-16))*100 |
|
model.train() |
|
return class_accuracy,no_obj_accuracy,obj_accuracy |
|
|
|
|
|
def get_mean_std(loader): |
|
|
|
channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0 |
|
|
|
for data, _ in tqdm(loader): |
|
channels_sum += torch.mean(data, dim=[0, 2, 3]) |
|
channels_sqrd_sum += torch.mean(data ** 2, dim=[0, 2, 3]) |
|
num_batches += 1 |
|
|
|
mean = channels_sum / num_batches |
|
std = (channels_sqrd_sum / num_batches - mean ** 2) ** 0.5 |
|
|
|
return mean, std |
|
|
|
|
|
def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"): |
|
print("=> Saving checkpoint") |
|
checkpoint = { |
|
"state_dict": model.state_dict(), |
|
"optimizer": optimizer.state_dict(), |
|
} |
|
torch.save(checkpoint, filename) |
|
|
|
|
|
def load_checkpoint(checkpoint_file, model, optimizer, lr): |
|
print("=> Loading checkpoint") |
|
checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE) |
|
model.load_state_dict(checkpoint["state_dict"]) |
|
optimizer.load_state_dict(checkpoint["optimizer"]) |
|
|
|
|
|
|
|
for param_group in optimizer.param_groups: |
|
param_group["lr"] = lr |
|
|
|
def get_loaders_new(test_csv_path): |
|
from dataset import YOLODatasetOK |
|
|
|
IMAGE_SIZE = config.IMAGE_SIZE |
|
|
|
test_dataset = YOLODatasetOK( |
|
test_csv_path, |
|
transform=config.test_transforms, |
|
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8], |
|
img_dir=config.IMG_DIR, |
|
label_dir=config.LABEL_DIR, |
|
anchors=config.ANCHORS, |
|
) |
|
|
|
|
|
test_loader = DataLoader( |
|
dataset=test_dataset, |
|
batch_size=config.BATCH_SIZE, |
|
num_workers=config.NUM_WORKERS, |
|
pin_memory=config.PIN_MEMORY, |
|
shuffle=False, |
|
drop_last=False, |
|
) |
|
|
|
|
|
|
|
return test_loader |
|
def get_loaders(train_csv_path, test_csv_path,valid_csv_path): |
|
from dataset import YOLODataset |
|
|
|
IMAGE_SIZE = config.IMAGE_SIZE |
|
train_dataset = YOLODataset( |
|
train_csv_path, |
|
transform=config.train_transforms, |
|
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8], |
|
img_dir=config.IMG_DIR, |
|
label_dir=config.LABEL_DIR, |
|
anchors=config.ANCHORS, |
|
) |
|
test_dataset = YOLODataset( |
|
test_csv_path, |
|
transform=config.test_transforms, |
|
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8], |
|
img_dir=config.IMG_DIR, |
|
label_dir=config.LABEL_DIR, |
|
anchors=config.ANCHORS, |
|
) |
|
train_loader = DataLoader( |
|
dataset=train_dataset, |
|
batch_size=config.BATCH_SIZE, |
|
num_workers=config.NUM_WORKERS, |
|
pin_memory=config.PIN_MEMORY, |
|
shuffle=True, |
|
drop_last=False, |
|
) |
|
test_loader = DataLoader( |
|
dataset=test_dataset, |
|
batch_size=config.BATCH_SIZE, |
|
num_workers=config.NUM_WORKERS, |
|
pin_memory=config.PIN_MEMORY, |
|
shuffle=False, |
|
drop_last=False, |
|
) |
|
|
|
train_eval_dataset = YOLODataset( |
|
valid_csv_path, |
|
transform=config.test_transforms, |
|
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8], |
|
img_dir=config.IMG_DIR, |
|
label_dir=config.LABEL_DIR, |
|
anchors=config.ANCHORS, |
|
) |
|
train_eval_loader = DataLoader( |
|
dataset=train_eval_dataset, |
|
batch_size=config.BATCH_SIZE, |
|
num_workers=config.NUM_WORKERS, |
|
pin_memory=config.PIN_MEMORY, |
|
shuffle=False, |
|
drop_last=False, |
|
) |
|
|
|
return train_loader, test_loader, train_eval_loader |
|
|
|
def plot_couple_examples(model, loader, thresh, iou_thresh, anchors): |
|
model.eval() |
|
x, y = next(iter(loader)) |
|
x = x.to("cuda") |
|
with torch.no_grad(): |
|
out = model(x) |
|
bboxes = [[] for _ in range(x.shape[0])] |
|
for i in range(3): |
|
batch_size, A, S, _, _ = out[i].shape |
|
anchor = anchors[i] |
|
boxes_scale_i = cells_to_bboxes( |
|
out[i], anchor, S=S, is_preds=True |
|
) |
|
for idx, (box) in enumerate(boxes_scale_i): |
|
bboxes[idx] += box |
|
|
|
model.train() |
|
|
|
for i in range(batch_size//4): |
|
nms_boxes = non_max_suppression( |
|
bboxes[i], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint", |
|
) |
|
plot_image(x[i].permute(1,2,0).detach().cpu(), nms_boxes) |
|
|
|
|
|
|
|
def seed_everything(seed=42): |
|
os.environ['PYTHONHASHSEED'] = str(seed) |
|
random.seed(seed) |
|
np.random.seed(seed) |
|
torch.manual_seed(seed) |
|
torch.cuda.manual_seed(seed) |
|
torch.cuda.manual_seed_all(seed) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
|
|
|
|
def clip_coords(boxes, img_shape): |
|
|
|
boxes[:, 0].clamp_(0, img_shape[1]) |
|
boxes[:, 1].clamp_(0, img_shape[0]) |
|
boxes[:, 2].clamp_(0, img_shape[1]) |
|
boxes[:, 3].clamp_(0, img_shape[0]) |
|
|
|
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): |
|
|
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) |
|
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw |
|
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh |
|
y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw |
|
y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh |
|
return y |
|
|
|
|
|
def xyn2xy(x, w=640, h=640, padw=0, padh=0): |
|
|
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) |
|
y[..., 0] = w * x[..., 0] + padw |
|
y[..., 1] = h * x[..., 1] + padh |
|
return y |
|
|
|
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): |
|
|
|
if clip: |
|
clip_boxes(x, (h - eps, w - eps)) |
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) |
|
y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w |
|
y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h |
|
y[..., 2] = (x[..., 2] - x[..., 0]) / w |
|
y[..., 3] = (x[..., 3] - x[..., 1]) / h |
|
return y |
|
|
|
def clip_boxes(boxes, shape): |
|
|
|
if isinstance(boxes, torch.Tensor): |
|
boxes[..., 0].clamp_(0, shape[1]) |
|
boxes[..., 1].clamp_(0, shape[0]) |
|
boxes[..., 2].clamp_(0, shape[1]) |
|
boxes[..., 3].clamp_(0, shape[0]) |
|
else: |
|
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) |
|
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) |
|
|
|
|
|
|
|
""" |
|
Utility Script containing functions to be used for training |
|
Author: Shilpaj Bhalerao |
|
""" |
|
|
|
import math |
|
from typing import NoReturn |
|
|
|
|
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import torch |
|
from torchsummary import summary |
|
from torchvision import transforms |
|
from pytorch_grad_cam import GradCAM |
|
from pytorch_grad_cam.utils.image import show_cam_on_image |
|
|
|
|
|
def get_summary(model: 'object of model architecture', input_size: tuple) -> NoReturn: |
|
""" |
|
Function to get the summary of the model architecture |
|
:param model: Object of model architecture class |
|
:param input_size: Input data shape (Channels, Height, Width) |
|
""" |
|
use_cuda = torch.cuda.is_available() |
|
device = torch.device("cuda" if use_cuda else "cpu") |
|
network = model.to(device) |
|
summary(network, input_size=input_size) |
|
|
|
|
|
def get_misclassified_data(model, device, test_loader): |
|
""" |
|
Function to run the model on test set and return misclassified images |
|
:param model: Network Architecture |
|
:param device: CPU/GPU |
|
:param test_loader: DataLoader for test set |
|
""" |
|
|
|
model.eval() |
|
|
|
|
|
misclassified_data = [] |
|
|
|
|
|
with torch.no_grad(): |
|
|
|
for data, target in test_loader: |
|
|
|
|
|
data, target = data.to(device), target.to(device) |
|
|
|
|
|
for image, label in zip(data, target): |
|
|
|
|
|
image = image.unsqueeze(0) |
|
|
|
|
|
output = model.prediction_step(image) |
|
|
|
|
|
pred = output.argmax(dim=1, keepdim=True) |
|
|
|
|
|
if pred != label: |
|
misclassified_data.append((image, label, pred)) |
|
return misclassified_data |
|
|
|
|
|
|
|
def get_mnist_statistics(data_set, data_set_type='Train'): |
|
""" |
|
Function to return the statistics of the training data |
|
:param data_set: Training dataset |
|
:param data_set_type: Type of dataset [Train/Test/Val] |
|
""" |
|
|
|
train_data = data_set.train_data |
|
train_data = data_set.transform(train_data.numpy()) |
|
|
|
print(f'[{data_set_type}]') |
|
print(' - Numpy Shape:', data_set.train_data.cpu().numpy().shape) |
|
print(' - Tensor Shape:', data_set.train_data.size()) |
|
print(' - min:', torch.min(train_data)) |
|
print(' - max:', torch.max(train_data)) |
|
print(' - mean:', torch.mean(train_data)) |
|
print(' - std:', torch.std(train_data)) |
|
print(' - var:', torch.var(train_data)) |
|
|
|
dataiter = next(iter(data_set)) |
|
images, labels = dataiter[0], dataiter[1] |
|
|
|
print(images.shape) |
|
print(labels) |
|
|
|
|
|
plt.imshow(images[0].numpy().squeeze(), cmap='gray') |
|
|
|
|
|
def get_cifar_property(images, operation): |
|
""" |
|
Get the property on each channel of the CIFAR |
|
:param images: Get the property value on the images |
|
:param operation: Mean, std, Variance, etc |
|
""" |
|
param_r = eval('images[:, 0, :, :].' + operation + '()') |
|
param_g = eval('images[:, 1, :, :].' + operation + '()') |
|
param_b = eval('images[:, 2, :, :].' + operation + '()') |
|
return param_r, param_g, param_b |
|
|
|
|
|
def get_cifar_statistics(data_set, data_set_type='Train'): |
|
""" |
|
Function to get the statistical information of the CIFAR dataset |
|
:param data_set: Training set of CIFAR |
|
:param data_set_type: Training or Test data |
|
""" |
|
|
|
images = [item[0] for item in data_set] |
|
images = torch.stack(images, dim=0).numpy() |
|
|
|
|
|
mean_r, mean_g, mean_b = get_cifar_property(images, 'mean') |
|
|
|
|
|
std_r, std_g, std_b = get_cifar_property(images, 'std') |
|
|
|
|
|
min_r, min_g, min_b = get_cifar_property(images, 'min') |
|
|
|
|
|
max_r, max_g, max_b = get_cifar_property(images, 'max') |
|
|
|
|
|
var_r, var_g, var_b = get_cifar_property(images, 'var') |
|
|
|
print(f'[{data_set_type}]') |
|
print(f' - Total {data_set_type} Images: {len(data_set)}') |
|
print(f' - Tensor Shape: {images[0].shape}') |
|
print(f' - min: {min_r, min_g, min_b}') |
|
print(f' - max: {max_r, max_g, max_b}') |
|
print(f' - mean: {mean_r, mean_g, mean_b}') |
|
print(f' - std: {std_r, std_g, std_b}') |
|
print(f' - var: {var_r, var_g, var_b}') |
|
|
|
|
|
plt.imshow(np.transpose(images[1].squeeze(), (1, 2, 0))) |
|
|
|
|
|
|
|
def display_gradcam_output(data: list, |
|
classes: list[str], |
|
inv_normalize: transforms.Normalize, |
|
model: 'DL Model', |
|
target_layers: list['model_layer'], |
|
targets=None, |
|
number_of_samples: int = 10, |
|
transparency: float = 0.60): |
|
""" |
|
Function to visualize GradCam output on the data |
|
:param data: List[Tuple(image, label)] |
|
:param classes: Name of classes in the dataset |
|
:param inv_normalize: Mean and Standard deviation values of the dataset |
|
:param model: Model architecture |
|
:param target_layers: Layers on which GradCam should be executed |
|
:param targets: Classes to be focused on for GradCam |
|
:param number_of_samples: Number of images to print |
|
:param transparency: Weight of Normal image when mixed with activations |
|
""" |
|
|
|
fig = plt.figure(figsize=(10, 10)) |
|
x_count = 5 |
|
y_count = 1 if number_of_samples <= 5 else math.floor(number_of_samples / x_count) |
|
|
|
|
|
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True) |
|
|
|
|
|
for i in range(number_of_samples): |
|
plt.subplot(y_count, x_count, i + 1) |
|
input_tensor = data[i][0] |
|
|
|
|
|
grayscale_cam = cam(input_tensor=input_tensor, targets=targets) |
|
grayscale_cam = grayscale_cam[0, :] |
|
|
|
|
|
img = input_tensor.squeeze(0).to('cpu') |
|
img = inv_normalize(img) |
|
rgb_img = np.transpose(img, (1, 2, 0)) |
|
rgb_img = rgb_img.numpy() |
|
|
|
|
|
visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency) |
|
|
|
|
|
plt.imshow(visualization) |
|
plt.title(r"Correct: " + classes[data[i][1].item()] + '\n' + 'Output: ' + classes[data[i][2].item()]) |
|
plt.xticks([]) |
|
plt.yticks([]) |
|
|
|
|
|
|
|
def display_images(images,labels,num,classes): |
|
fig = plt.figure(figsize=(12, 12)) |
|
|
|
|
|
for idx in np.arange(num): |
|
ax = fig.add_subplot(2, 10, idx+1, xticks=[], yticks=[]) |
|
plt.imshow(im_convert(images[idx])) |
|
ax.set_title(classes[labels[idx].item()]) |
|
|
|
|
|
def GetCorrectPredCount(pPrediction, pLabels): |
|
return pPrediction.argmax(dim=1).eq(pLabels).sum().item() |
|
|
|
def display_model_summary(model,input_structure=(1,28,28)): |
|
summary(model, input_size=input_structure) |
|
|
|
def imshow(img): |
|
img = img / 2 + 0.5 |
|
npimg = img.numpy() |
|
plt.imshow(np.transpose(npimg, (1, 2, 0))) |
|
|
|
def calculate_mean_std(dataset): |
|
if dataset == 'CIFAR10': |
|
train_transform = transforms.Compose([transforms.ToTensor()]) |
|
train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform) |
|
mean = train_set.data.mean(axis=(0,1,2))/255 |
|
std = train_set.data.std(axis=(0,1,2))/255 |
|
return (mean), (std) |
|
|
|
def im_convert(tensor): |
|
image = tensor.cpu().clone().detach().numpy() |
|
image = image.transpose(1, 2, 0) |
|
image = image * np.array((0.5, 0., 0.5)) + np.array((0.5, 0.5, 0.5)) |
|
image = image.clip(0, 1) |
|
return image |
|
|
|
def im_convert_numpy(image): |
|
|
|
image = image.transpose(1, 2, 0) |
|
image = image * np.array((0.5, 0.5, 0.5)) + np.array((0.5, 0.5, 0.5)) |
|
image = image.clip(0, 1) |
|
return image |
|
|
|
def find_misclassified_images(num_of_images,test_loader,device,model): |
|
count = 0 |
|
fig = plt.figure(figsize=(25, 4)) |
|
|
|
misclassified_images = [] |
|
misclassified_labels = [] |
|
true_labels = [] |
|
|
|
while (count < num_of_images): |
|
dataiter = iter(test_loader) |
|
images, labels = next(dataiter) |
|
images = images.to(device) |
|
labels = labels.to(device) |
|
output = model(images) |
|
_, preds = torch.max(output, 1) |
|
for idx in range(4): |
|
if preds[idx] !=labels[idx] and count < 15: |
|
count +=1 |
|
misclassified_images.append(images[idx].cpu().detach().numpy()) |
|
misclassified_labels.append(preds[idx].cpu().detach().numpy()) |
|
true_labels.append(labels[idx].cpu().detach().numpy()) |
|
else: |
|
break |
|
return misclassified_images,misclassified_labels,true_labels |
|
|
|
def display_missclassfied_images(missclassified_images,classes): |
|
|
|
fig = plt.figure(figsize=(10, 4)) |
|
for idx in range(len(missclassified_images)): |
|
ax = fig.add_subplot(3, 5, idx+1, xticks=[], yticks=[]) |
|
plt.imshow(im_convert_numpy(missclassified_images[idx])) |
|
ax.set_title("{} ({})".format(str(classes[missclassified_images[idx]]), str(classes[true_labels[idx]])), color=("red")) |