Spaces:
Build error
Build error
added retinanet repo
Browse files- retinanet/anchors.py +130 -0
- retinanet/coco_eval.py +84 -0
- retinanet/csv_eval.py +259 -0
- retinanet/dataloader.py +458 -0
- retinanet/losses.py +177 -0
- retinanet/model.py +353 -0
- retinanet/oid_dataset.py +260 -0
- retinanet/utils.py +144 -0
retinanet/anchors.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
|
5 |
+
|
6 |
+
class Anchors(nn.Module):
|
7 |
+
def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
|
8 |
+
super(Anchors, self).__init__()
|
9 |
+
|
10 |
+
if pyramid_levels is None:
|
11 |
+
self.pyramid_levels = [3, 4, 5, 6, 7]
|
12 |
+
if strides is None:
|
13 |
+
self.strides = [2 ** x for x in self.pyramid_levels]
|
14 |
+
if sizes is None:
|
15 |
+
self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
|
16 |
+
if ratios is None:
|
17 |
+
self.ratios = np.array([0.5, 1, 2])
|
18 |
+
if scales is None:
|
19 |
+
self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
|
20 |
+
|
21 |
+
def forward(self, image):
|
22 |
+
|
23 |
+
image_shape = image.shape[2:]
|
24 |
+
image_shape = np.array(image_shape)
|
25 |
+
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
|
26 |
+
|
27 |
+
# compute anchors over all pyramid levels
|
28 |
+
all_anchors = np.zeros((0, 4)).astype(np.float32)
|
29 |
+
|
30 |
+
for idx, p in enumerate(self.pyramid_levels):
|
31 |
+
anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
|
32 |
+
shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
|
33 |
+
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
|
34 |
+
|
35 |
+
all_anchors = np.expand_dims(all_anchors, axis=0)
|
36 |
+
|
37 |
+
if torch.cuda.is_available():
|
38 |
+
return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
|
39 |
+
else:
|
40 |
+
return torch.from_numpy(all_anchors.astype(np.float32))
|
41 |
+
|
42 |
+
def generate_anchors(base_size=16, ratios=None, scales=None):
|
43 |
+
"""
|
44 |
+
Generate anchor (reference) windows by enumerating aspect ratios X
|
45 |
+
scales w.r.t. a reference window.
|
46 |
+
"""
|
47 |
+
|
48 |
+
if ratios is None:
|
49 |
+
ratios = np.array([0.5, 1, 2])
|
50 |
+
|
51 |
+
if scales is None:
|
52 |
+
scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
|
53 |
+
|
54 |
+
num_anchors = len(ratios) * len(scales)
|
55 |
+
|
56 |
+
# initialize output anchors
|
57 |
+
anchors = np.zeros((num_anchors, 4))
|
58 |
+
|
59 |
+
# scale base_size
|
60 |
+
anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
|
61 |
+
|
62 |
+
# compute areas of anchors
|
63 |
+
areas = anchors[:, 2] * anchors[:, 3]
|
64 |
+
|
65 |
+
# correct for ratios
|
66 |
+
anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
|
67 |
+
anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
|
68 |
+
|
69 |
+
# transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
|
70 |
+
anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
|
71 |
+
anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
|
72 |
+
|
73 |
+
return anchors
|
74 |
+
|
75 |
+
def compute_shape(image_shape, pyramid_levels):
|
76 |
+
"""Compute shapes based on pyramid levels.
|
77 |
+
|
78 |
+
:param image_shape:
|
79 |
+
:param pyramid_levels:
|
80 |
+
:return:
|
81 |
+
"""
|
82 |
+
image_shape = np.array(image_shape[:2])
|
83 |
+
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
|
84 |
+
return image_shapes
|
85 |
+
|
86 |
+
|
87 |
+
def anchors_for_shape(
|
88 |
+
image_shape,
|
89 |
+
pyramid_levels=None,
|
90 |
+
ratios=None,
|
91 |
+
scales=None,
|
92 |
+
strides=None,
|
93 |
+
sizes=None,
|
94 |
+
shapes_callback=None,
|
95 |
+
):
|
96 |
+
|
97 |
+
image_shapes = compute_shape(image_shape, pyramid_levels)
|
98 |
+
|
99 |
+
# compute anchors over all pyramid levels
|
100 |
+
all_anchors = np.zeros((0, 4))
|
101 |
+
for idx, p in enumerate(pyramid_levels):
|
102 |
+
anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
|
103 |
+
shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
|
104 |
+
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
|
105 |
+
|
106 |
+
return all_anchors
|
107 |
+
|
108 |
+
|
109 |
+
def shift(shape, stride, anchors):
|
110 |
+
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
|
111 |
+
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
|
112 |
+
|
113 |
+
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
|
114 |
+
|
115 |
+
shifts = np.vstack((
|
116 |
+
shift_x.ravel(), shift_y.ravel(),
|
117 |
+
shift_x.ravel(), shift_y.ravel()
|
118 |
+
)).transpose()
|
119 |
+
|
120 |
+
# add A anchors (1, A, 4) to
|
121 |
+
# cell K shifts (K, 1, 4) to get
|
122 |
+
# shift anchors (K, A, 4)
|
123 |
+
# reshape to (K*A, 4) shifted anchors
|
124 |
+
A = anchors.shape[0]
|
125 |
+
K = shifts.shape[0]
|
126 |
+
all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
|
127 |
+
all_anchors = all_anchors.reshape((K * A, 4))
|
128 |
+
|
129 |
+
return all_anchors
|
130 |
+
|
retinanet/coco_eval.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pycocotools.cocoeval import COCOeval
|
2 |
+
import json
|
3 |
+
import torch
|
4 |
+
|
5 |
+
|
6 |
+
def evaluate_coco(dataset, model, threshold=0.05):
|
7 |
+
|
8 |
+
model.eval()
|
9 |
+
|
10 |
+
with torch.no_grad():
|
11 |
+
|
12 |
+
# start collecting results
|
13 |
+
results = []
|
14 |
+
image_ids = []
|
15 |
+
|
16 |
+
for index in range(len(dataset)):
|
17 |
+
data = dataset[index]
|
18 |
+
scale = data['scale']
|
19 |
+
|
20 |
+
# run network
|
21 |
+
if torch.cuda.is_available():
|
22 |
+
scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
|
23 |
+
else:
|
24 |
+
scores, labels, boxes = model(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
|
25 |
+
scores = scores.cpu()
|
26 |
+
labels = labels.cpu()
|
27 |
+
boxes = boxes.cpu()
|
28 |
+
|
29 |
+
# correct boxes for image scale
|
30 |
+
boxes /= scale
|
31 |
+
|
32 |
+
if boxes.shape[0] > 0:
|
33 |
+
# change to (x, y, w, h) (MS COCO standard)
|
34 |
+
boxes[:, 2] -= boxes[:, 0]
|
35 |
+
boxes[:, 3] -= boxes[:, 1]
|
36 |
+
|
37 |
+
# compute predicted labels and scores
|
38 |
+
#for box, score, label in zip(boxes[0], scores[0], labels[0]):
|
39 |
+
for box_id in range(boxes.shape[0]):
|
40 |
+
score = float(scores[box_id])
|
41 |
+
label = int(labels[box_id])
|
42 |
+
box = boxes[box_id, :]
|
43 |
+
|
44 |
+
# scores are sorted, so we can break
|
45 |
+
if score < threshold:
|
46 |
+
break
|
47 |
+
|
48 |
+
# append detection for each positively labeled class
|
49 |
+
image_result = {
|
50 |
+
'image_id' : dataset.image_ids[index],
|
51 |
+
'category_id' : dataset.label_to_coco_label(label),
|
52 |
+
'score' : float(score),
|
53 |
+
'bbox' : box.tolist(),
|
54 |
+
}
|
55 |
+
|
56 |
+
# append detection to results
|
57 |
+
results.append(image_result)
|
58 |
+
|
59 |
+
# append image to list of processed images
|
60 |
+
image_ids.append(dataset.image_ids[index])
|
61 |
+
|
62 |
+
# print progress
|
63 |
+
print('{}/{}'.format(index, len(dataset)), end='\r')
|
64 |
+
|
65 |
+
if not len(results):
|
66 |
+
return
|
67 |
+
|
68 |
+
# write output
|
69 |
+
json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
|
70 |
+
|
71 |
+
# load results in COCO evaluation tool
|
72 |
+
coco_true = dataset.coco
|
73 |
+
coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
|
74 |
+
|
75 |
+
# run COCO evaluation
|
76 |
+
coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
|
77 |
+
coco_eval.params.imgIds = image_ids
|
78 |
+
coco_eval.evaluate()
|
79 |
+
coco_eval.accumulate()
|
80 |
+
coco_eval.summarize()
|
81 |
+
|
82 |
+
model.train()
|
83 |
+
|
84 |
+
return
|
retinanet/csv_eval.py
ADDED
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
def compute_overlap(a, b):
|
12 |
+
"""
|
13 |
+
Parameters
|
14 |
+
----------
|
15 |
+
a: (N, 4) ndarray of float
|
16 |
+
b: (K, 4) ndarray of float
|
17 |
+
Returns
|
18 |
+
-------
|
19 |
+
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
|
20 |
+
"""
|
21 |
+
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
|
22 |
+
|
23 |
+
iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
|
24 |
+
ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
|
25 |
+
|
26 |
+
iw = np.maximum(iw, 0)
|
27 |
+
ih = np.maximum(ih, 0)
|
28 |
+
|
29 |
+
ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
|
30 |
+
|
31 |
+
ua = np.maximum(ua, np.finfo(float).eps)
|
32 |
+
|
33 |
+
intersection = iw * ih
|
34 |
+
|
35 |
+
return intersection / ua
|
36 |
+
|
37 |
+
|
38 |
+
def _compute_ap(recall, precision):
|
39 |
+
""" Compute the average precision, given the recall and precision curves.
|
40 |
+
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
41 |
+
# Arguments
|
42 |
+
recall: The recall curve (list).
|
43 |
+
precision: The precision curve (list).
|
44 |
+
# Returns
|
45 |
+
The average precision as computed in py-faster-rcnn.
|
46 |
+
"""
|
47 |
+
# correct AP calculation
|
48 |
+
# first append sentinel values at the end
|
49 |
+
mrec = np.concatenate(([0.], recall, [1.]))
|
50 |
+
mpre = np.concatenate(([0.], precision, [0.]))
|
51 |
+
|
52 |
+
# compute the precision envelope
|
53 |
+
for i in range(mpre.size - 1, 0, -1):
|
54 |
+
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
55 |
+
|
56 |
+
# to calculate area under PR curve, look for points
|
57 |
+
# where X axis (recall) changes value
|
58 |
+
i = np.where(mrec[1:] != mrec[:-1])[0]
|
59 |
+
|
60 |
+
# and sum (\Delta recall) * prec
|
61 |
+
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
62 |
+
return ap
|
63 |
+
|
64 |
+
|
65 |
+
def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, save_path=None):
|
66 |
+
""" Get the detections from the retinanet using the generator.
|
67 |
+
The result is a list of lists such that the size is:
|
68 |
+
all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
|
69 |
+
# Arguments
|
70 |
+
dataset : The generator used to run images through the retinanet.
|
71 |
+
retinanet : The retinanet to run on the images.
|
72 |
+
score_threshold : The score confidence threshold to use.
|
73 |
+
max_detections : The maximum number of detections to use per image.
|
74 |
+
save_path : The path to save the images with visualized detections to.
|
75 |
+
# Returns
|
76 |
+
A list of lists containing the detections for each image in the generator.
|
77 |
+
"""
|
78 |
+
all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
|
79 |
+
|
80 |
+
retinanet.eval()
|
81 |
+
|
82 |
+
with torch.no_grad():
|
83 |
+
|
84 |
+
for index in range(len(dataset)):
|
85 |
+
data = dataset[index]
|
86 |
+
scale = data['scale']
|
87 |
+
|
88 |
+
# run network
|
89 |
+
if torch.cuda.is_available():
|
90 |
+
scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
|
91 |
+
else:
|
92 |
+
scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
|
93 |
+
scores = scores.cpu().numpy()
|
94 |
+
labels = labels.cpu().numpy()
|
95 |
+
boxes = boxes.cpu().numpy()
|
96 |
+
|
97 |
+
# correct boxes for image scale
|
98 |
+
boxes /= scale
|
99 |
+
|
100 |
+
# select indices which have a score above the threshold
|
101 |
+
indices = np.where(scores > score_threshold)[0]
|
102 |
+
if indices.shape[0] > 0:
|
103 |
+
# select those scores
|
104 |
+
scores = scores[indices]
|
105 |
+
|
106 |
+
# find the order with which to sort the scores
|
107 |
+
scores_sort = np.argsort(-scores)[:max_detections]
|
108 |
+
|
109 |
+
# select detections
|
110 |
+
image_boxes = boxes[indices[scores_sort], :]
|
111 |
+
image_scores = scores[scores_sort]
|
112 |
+
image_labels = labels[indices[scores_sort]]
|
113 |
+
image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
|
114 |
+
|
115 |
+
# copy detections to all_detections
|
116 |
+
for label in range(dataset.num_classes()):
|
117 |
+
all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
|
118 |
+
else:
|
119 |
+
# copy detections to all_detections
|
120 |
+
for label in range(dataset.num_classes()):
|
121 |
+
all_detections[index][label] = np.zeros((0, 5))
|
122 |
+
|
123 |
+
print('{}/{}'.format(index + 1, len(dataset)), end='\r')
|
124 |
+
|
125 |
+
return all_detections
|
126 |
+
|
127 |
+
|
128 |
+
def _get_annotations(generator):
|
129 |
+
""" Get the ground truth annotations from the generator.
|
130 |
+
The result is a list of lists such that the size is:
|
131 |
+
all_detections[num_images][num_classes] = annotations[num_detections, 5]
|
132 |
+
# Arguments
|
133 |
+
generator : The generator used to retrieve ground truth annotations.
|
134 |
+
# Returns
|
135 |
+
A list of lists containing the annotations for each image in the generator.
|
136 |
+
"""
|
137 |
+
all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
|
138 |
+
|
139 |
+
for i in range(len(generator)):
|
140 |
+
# load the annotations
|
141 |
+
annotations = generator.load_annotations(i)
|
142 |
+
|
143 |
+
# copy detections to all_annotations
|
144 |
+
for label in range(generator.num_classes()):
|
145 |
+
all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
|
146 |
+
|
147 |
+
print('{}/{}'.format(i + 1, len(generator)), end='\r')
|
148 |
+
|
149 |
+
return all_annotations
|
150 |
+
|
151 |
+
|
152 |
+
def evaluate(
|
153 |
+
generator,
|
154 |
+
retinanet,
|
155 |
+
iou_threshold=0.5,
|
156 |
+
score_threshold=0.05,
|
157 |
+
max_detections=100,
|
158 |
+
save_path=None
|
159 |
+
):
|
160 |
+
""" Evaluate a given dataset using a given retinanet.
|
161 |
+
# Arguments
|
162 |
+
generator : The generator that represents the dataset to evaluate.
|
163 |
+
retinanet : The retinanet to evaluate.
|
164 |
+
iou_threshold : The threshold used to consider when a detection is positive or negative.
|
165 |
+
score_threshold : The score confidence threshold to use for detections.
|
166 |
+
max_detections : The maximum number of detections to use per image.
|
167 |
+
save_path : The path to save precision recall curve of each label.
|
168 |
+
# Returns
|
169 |
+
A dict mapping class names to mAP scores.
|
170 |
+
"""
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
# gather all detections and annotations
|
175 |
+
|
176 |
+
all_detections = _get_detections(generator, retinanet, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
|
177 |
+
all_annotations = _get_annotations(generator)
|
178 |
+
|
179 |
+
average_precisions = {}
|
180 |
+
|
181 |
+
for label in range(generator.num_classes()):
|
182 |
+
false_positives = np.zeros((0,))
|
183 |
+
true_positives = np.zeros((0,))
|
184 |
+
scores = np.zeros((0,))
|
185 |
+
num_annotations = 0.0
|
186 |
+
|
187 |
+
for i in range(len(generator)):
|
188 |
+
detections = all_detections[i][label]
|
189 |
+
annotations = all_annotations[i][label]
|
190 |
+
num_annotations += annotations.shape[0]
|
191 |
+
detected_annotations = []
|
192 |
+
|
193 |
+
for d in detections:
|
194 |
+
scores = np.append(scores, d[4])
|
195 |
+
|
196 |
+
if annotations.shape[0] == 0:
|
197 |
+
false_positives = np.append(false_positives, 1)
|
198 |
+
true_positives = np.append(true_positives, 0)
|
199 |
+
continue
|
200 |
+
|
201 |
+
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
|
202 |
+
assigned_annotation = np.argmax(overlaps, axis=1)
|
203 |
+
max_overlap = overlaps[0, assigned_annotation]
|
204 |
+
|
205 |
+
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
|
206 |
+
false_positives = np.append(false_positives, 0)
|
207 |
+
true_positives = np.append(true_positives, 1)
|
208 |
+
detected_annotations.append(assigned_annotation)
|
209 |
+
else:
|
210 |
+
false_positives = np.append(false_positives, 1)
|
211 |
+
true_positives = np.append(true_positives, 0)
|
212 |
+
|
213 |
+
# no annotations -> AP for this class is 0 (is this correct?)
|
214 |
+
if num_annotations == 0:
|
215 |
+
average_precisions[label] = 0, 0
|
216 |
+
continue
|
217 |
+
|
218 |
+
# sort by score
|
219 |
+
indices = np.argsort(-scores)
|
220 |
+
false_positives = false_positives[indices]
|
221 |
+
true_positives = true_positives[indices]
|
222 |
+
|
223 |
+
# compute false positives and true positives
|
224 |
+
false_positives = np.cumsum(false_positives)
|
225 |
+
true_positives = np.cumsum(true_positives)
|
226 |
+
|
227 |
+
# compute recall and precision
|
228 |
+
recall = true_positives / num_annotations
|
229 |
+
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
|
230 |
+
|
231 |
+
# compute average precision
|
232 |
+
average_precision = _compute_ap(recall, precision)
|
233 |
+
average_precisions[label] = average_precision, num_annotations
|
234 |
+
|
235 |
+
|
236 |
+
print('\nmAP:')
|
237 |
+
for label in range(generator.num_classes()):
|
238 |
+
label_name = generator.label_to_name(label)
|
239 |
+
print('{}: {}'.format(label_name, average_precisions[label][0]))
|
240 |
+
print("Precision: ",precision[-1])
|
241 |
+
print("Recall: ",recall[-1])
|
242 |
+
|
243 |
+
if save_path!=None:
|
244 |
+
plt.plot(recall,precision)
|
245 |
+
# naming the x axis
|
246 |
+
plt.xlabel('Recall')
|
247 |
+
# naming the y axis
|
248 |
+
plt.ylabel('Precision')
|
249 |
+
|
250 |
+
# giving a title to my graph
|
251 |
+
plt.title('Precision Recall curve')
|
252 |
+
|
253 |
+
# function to show the plot
|
254 |
+
plt.savefig(save_path+'/'+label_name+'_precision_recall.jpg')
|
255 |
+
|
256 |
+
|
257 |
+
|
258 |
+
return average_precisions
|
259 |
+
|
retinanet/dataloader.py
ADDED
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function, division
|
2 |
+
import sys
|
3 |
+
import os
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
import random
|
7 |
+
import csv
|
8 |
+
|
9 |
+
from torch.utils.data import Dataset, DataLoader
|
10 |
+
from torchvision import transforms, utils
|
11 |
+
from torch.utils.data.sampler import Sampler
|
12 |
+
|
13 |
+
from pycocotools.coco import COCO
|
14 |
+
|
15 |
+
import skimage.io
|
16 |
+
import skimage.transform
|
17 |
+
import skimage.color
|
18 |
+
import skimage
|
19 |
+
|
20 |
+
from PIL import Image
|
21 |
+
|
22 |
+
|
23 |
+
class CocoDataset(Dataset):
|
24 |
+
"""Coco dataset."""
|
25 |
+
|
26 |
+
def __init__(self, root_dir, set_name='train2017', transform=None):
|
27 |
+
"""
|
28 |
+
Args:
|
29 |
+
root_dir (string): COCO directory.
|
30 |
+
transform (callable, optional): Optional transform to be applied
|
31 |
+
on a sample.
|
32 |
+
"""
|
33 |
+
self.root_dir = root_dir
|
34 |
+
self.set_name = set_name
|
35 |
+
self.transform = transform
|
36 |
+
|
37 |
+
self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
|
38 |
+
self.image_ids = self.coco.getImgIds()
|
39 |
+
|
40 |
+
self.load_classes()
|
41 |
+
|
42 |
+
def load_classes(self):
|
43 |
+
# load class names (name -> label)
|
44 |
+
categories = self.coco.loadCats(self.coco.getCatIds())
|
45 |
+
categories.sort(key=lambda x: x['id'])
|
46 |
+
|
47 |
+
self.classes = {}
|
48 |
+
self.coco_labels = {}
|
49 |
+
self.coco_labels_inverse = {}
|
50 |
+
for c in categories:
|
51 |
+
self.coco_labels[len(self.classes)] = c['id']
|
52 |
+
self.coco_labels_inverse[c['id']] = len(self.classes)
|
53 |
+
self.classes[c['name']] = len(self.classes)
|
54 |
+
|
55 |
+
# also load the reverse (label -> name)
|
56 |
+
self.labels = {}
|
57 |
+
for key, value in self.classes.items():
|
58 |
+
self.labels[value] = key
|
59 |
+
|
60 |
+
def __len__(self):
|
61 |
+
return len(self.image_ids)
|
62 |
+
|
63 |
+
def __getitem__(self, idx):
|
64 |
+
|
65 |
+
img = self.load_image(idx)
|
66 |
+
annot = self.load_annotations(idx)
|
67 |
+
sample = {'img': img, 'annot': annot}
|
68 |
+
if self.transform:
|
69 |
+
sample = self.transform(sample)
|
70 |
+
|
71 |
+
return sample
|
72 |
+
|
73 |
+
def load_image(self, image_index):
|
74 |
+
image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
|
75 |
+
path = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
|
76 |
+
img = skimage.io.imread(path)
|
77 |
+
|
78 |
+
if len(img.shape) == 2:
|
79 |
+
img = skimage.color.gray2rgb(img)
|
80 |
+
|
81 |
+
return img.astype(np.float32)/255.0
|
82 |
+
|
83 |
+
def load_annotations(self, image_index):
|
84 |
+
# get ground truth annotations
|
85 |
+
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
|
86 |
+
annotations = np.zeros((0, 5))
|
87 |
+
|
88 |
+
# some images appear to miss annotations (like image with id 257034)
|
89 |
+
if len(annotations_ids) == 0:
|
90 |
+
return annotations
|
91 |
+
|
92 |
+
# parse annotations
|
93 |
+
coco_annotations = self.coco.loadAnns(annotations_ids)
|
94 |
+
for idx, a in enumerate(coco_annotations):
|
95 |
+
|
96 |
+
# some annotations have basically no width / height, skip them
|
97 |
+
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
|
98 |
+
continue
|
99 |
+
|
100 |
+
annotation = np.zeros((1, 5))
|
101 |
+
annotation[0, :4] = a['bbox']
|
102 |
+
annotation[0, 4] = self.coco_label_to_label(a['category_id'])
|
103 |
+
annotations = np.append(annotations, annotation, axis=0)
|
104 |
+
|
105 |
+
# transform from [x, y, w, h] to [x1, y1, x2, y2]
|
106 |
+
annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
|
107 |
+
annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
|
108 |
+
|
109 |
+
return annotations
|
110 |
+
|
111 |
+
def coco_label_to_label(self, coco_label):
|
112 |
+
return self.coco_labels_inverse[coco_label]
|
113 |
+
|
114 |
+
|
115 |
+
def label_to_coco_label(self, label):
|
116 |
+
return self.coco_labels[label]
|
117 |
+
|
118 |
+
def image_aspect_ratio(self, image_index):
|
119 |
+
image = self.coco.loadImgs(self.image_ids[image_index])[0]
|
120 |
+
return float(image['width']) / float(image['height'])
|
121 |
+
|
122 |
+
def num_classes(self):
|
123 |
+
return 80
|
124 |
+
|
125 |
+
|
126 |
+
class CSVDataset(Dataset):
|
127 |
+
"""CSV dataset."""
|
128 |
+
|
129 |
+
def __init__(self, train_file, class_list, transform=None):
|
130 |
+
"""
|
131 |
+
Args:
|
132 |
+
train_file (string): CSV file with training annotations
|
133 |
+
annotations (string): CSV file with class list
|
134 |
+
test_file (string, optional): CSV file with testing annotations
|
135 |
+
"""
|
136 |
+
self.train_file = train_file
|
137 |
+
self.class_list = class_list
|
138 |
+
self.transform = transform
|
139 |
+
|
140 |
+
# parse the provided class file
|
141 |
+
try:
|
142 |
+
with self._open_for_csv(self.class_list) as file:
|
143 |
+
self.classes = self.load_classes(csv.reader(file, delimiter=','))
|
144 |
+
except ValueError as e:
|
145 |
+
raise(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)))
|
146 |
+
|
147 |
+
self.labels = {}
|
148 |
+
for key, value in self.classes.items():
|
149 |
+
self.labels[value] = key
|
150 |
+
|
151 |
+
# csv with img_path, x1, y1, x2, y2, class_name
|
152 |
+
try:
|
153 |
+
with self._open_for_csv(self.train_file) as file:
|
154 |
+
self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
|
155 |
+
except ValueError as e:
|
156 |
+
raise(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)))
|
157 |
+
self.image_names = list(self.image_data.keys())
|
158 |
+
|
159 |
+
def _parse(self, value, function, fmt):
|
160 |
+
"""
|
161 |
+
Parse a string into a value, and format a nice ValueError if it fails.
|
162 |
+
Returns `function(value)`.
|
163 |
+
Any `ValueError` raised is catched and a new `ValueError` is raised
|
164 |
+
with message `fmt.format(e)`, where `e` is the caught `ValueError`.
|
165 |
+
"""
|
166 |
+
try:
|
167 |
+
return function(value)
|
168 |
+
except ValueError as e:
|
169 |
+
raise_from(ValueError(fmt.format(e)), None)
|
170 |
+
|
171 |
+
def _open_for_csv(self, path):
|
172 |
+
"""
|
173 |
+
Open a file with flags suitable for csv.reader.
|
174 |
+
This is different for python2 it means with mode 'rb',
|
175 |
+
for python3 this means 'r' with "universal newlines".
|
176 |
+
"""
|
177 |
+
if sys.version_info[0] < 3:
|
178 |
+
return open(path, 'rb')
|
179 |
+
else:
|
180 |
+
return open(path, 'r', newline='')
|
181 |
+
|
182 |
+
def load_classes(self, csv_reader):
|
183 |
+
result = {}
|
184 |
+
|
185 |
+
for line, row in enumerate(csv_reader):
|
186 |
+
line += 1
|
187 |
+
|
188 |
+
try:
|
189 |
+
class_name, class_id = row
|
190 |
+
except ValueError:
|
191 |
+
raise(ValueError('line {}: format should be \'class_name,class_id\''.format(line)))
|
192 |
+
class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
|
193 |
+
|
194 |
+
if class_name in result:
|
195 |
+
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
|
196 |
+
result[class_name] = class_id
|
197 |
+
return result
|
198 |
+
|
199 |
+
def __len__(self):
|
200 |
+
return len(self.image_names)
|
201 |
+
|
202 |
+
def __getitem__(self, idx):
|
203 |
+
|
204 |
+
img = self.load_image(idx)
|
205 |
+
annot = self.load_annotations(idx)
|
206 |
+
sample = {'img': img, 'annot': annot}
|
207 |
+
if self.transform:
|
208 |
+
sample = self.transform(sample)
|
209 |
+
|
210 |
+
return sample
|
211 |
+
|
212 |
+
def load_image(self, image_index):
|
213 |
+
img = skimage.io.imread(self.image_names[image_index])
|
214 |
+
|
215 |
+
if len(img.shape) == 2:
|
216 |
+
img = skimage.color.gray2rgb(img)
|
217 |
+
|
218 |
+
return img.astype(np.float32)/255.0
|
219 |
+
|
220 |
+
def load_annotations(self, image_index):
|
221 |
+
# get ground truth annotations
|
222 |
+
annotation_list = self.image_data[self.image_names[image_index]]
|
223 |
+
annotations = np.zeros((0, 5))
|
224 |
+
|
225 |
+
# some images appear to miss annotations (like image with id 257034)
|
226 |
+
if len(annotation_list) == 0:
|
227 |
+
return annotations
|
228 |
+
|
229 |
+
# parse annotations
|
230 |
+
for idx, a in enumerate(annotation_list):
|
231 |
+
# some annotations have basically no width / height, skip them
|
232 |
+
x1 = a['x1']
|
233 |
+
x2 = a['x2']
|
234 |
+
y1 = a['y1']
|
235 |
+
y2 = a['y2']
|
236 |
+
|
237 |
+
if (x2-x1) < 1 or (y2-y1) < 1:
|
238 |
+
continue
|
239 |
+
|
240 |
+
annotation = np.zeros((1, 5))
|
241 |
+
|
242 |
+
annotation[0, 0] = x1
|
243 |
+
annotation[0, 1] = y1
|
244 |
+
annotation[0, 2] = x2
|
245 |
+
annotation[0, 3] = y2
|
246 |
+
|
247 |
+
annotation[0, 4] = self.name_to_label(a['class'])
|
248 |
+
annotations = np.append(annotations, annotation, axis=0)
|
249 |
+
|
250 |
+
return annotations
|
251 |
+
|
252 |
+
def _read_annotations(self, csv_reader, classes):
|
253 |
+
result = {}
|
254 |
+
for line, row in enumerate(csv_reader):
|
255 |
+
line += 1
|
256 |
+
|
257 |
+
try:
|
258 |
+
img_file, x1, y1, x2, y2, class_name = row[:6]
|
259 |
+
except ValueError:
|
260 |
+
raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
|
261 |
+
|
262 |
+
if img_file not in result:
|
263 |
+
result[img_file] = []
|
264 |
+
|
265 |
+
# If a row contains only an image path, it's an image without annotations.
|
266 |
+
if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
|
267 |
+
continue
|
268 |
+
|
269 |
+
x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
|
270 |
+
y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
|
271 |
+
x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
|
272 |
+
y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
|
273 |
+
|
274 |
+
# Check that the bounding box is valid.
|
275 |
+
if x2 <= x1:
|
276 |
+
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
|
277 |
+
if y2 <= y1:
|
278 |
+
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
|
279 |
+
|
280 |
+
# check if the current class name is correctly present
|
281 |
+
if class_name not in classes:
|
282 |
+
raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
|
283 |
+
|
284 |
+
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
|
285 |
+
return result
|
286 |
+
|
287 |
+
def name_to_label(self, name):
|
288 |
+
return self.classes[name]
|
289 |
+
|
290 |
+
def label_to_name(self, label):
|
291 |
+
return self.labels[label]
|
292 |
+
|
293 |
+
def num_classes(self):
|
294 |
+
return max(self.classes.values()) + 1
|
295 |
+
|
296 |
+
def image_aspect_ratio(self, image_index):
|
297 |
+
image = Image.open(self.image_names[image_index])
|
298 |
+
return float(image.width) / float(image.height)
|
299 |
+
|
300 |
+
|
301 |
+
def collater(data):
|
302 |
+
|
303 |
+
imgs = [s['img'] for s in data]
|
304 |
+
annots = [s['annot'] for s in data]
|
305 |
+
scales = [s['scale'] for s in data]
|
306 |
+
|
307 |
+
widths = [int(s.shape[0]) for s in imgs]
|
308 |
+
heights = [int(s.shape[1]) for s in imgs]
|
309 |
+
batch_size = len(imgs)
|
310 |
+
|
311 |
+
max_width = np.array(widths).max()
|
312 |
+
max_height = np.array(heights).max()
|
313 |
+
|
314 |
+
padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
|
315 |
+
|
316 |
+
for i in range(batch_size):
|
317 |
+
img = imgs[i]
|
318 |
+
padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
|
319 |
+
|
320 |
+
max_num_annots = max(annot.shape[0] for annot in annots)
|
321 |
+
|
322 |
+
if max_num_annots > 0:
|
323 |
+
|
324 |
+
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
|
325 |
+
|
326 |
+
if max_num_annots > 0:
|
327 |
+
for idx, annot in enumerate(annots):
|
328 |
+
#print(annot.shape)
|
329 |
+
if annot.shape[0] > 0:
|
330 |
+
annot_padded[idx, :annot.shape[0], :] = annot
|
331 |
+
else:
|
332 |
+
annot_padded = torch.ones((len(annots), 1, 5)) * -1
|
333 |
+
|
334 |
+
|
335 |
+
padded_imgs = padded_imgs.permute(0, 3, 1, 2)
|
336 |
+
|
337 |
+
return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
|
338 |
+
|
339 |
+
class Resizer(object):
|
340 |
+
"""Convert ndarrays in sample to Tensors."""
|
341 |
+
|
342 |
+
def __call__(self, sample, min_side=608, max_side=1024):
|
343 |
+
image, annots = sample['img'], sample['annot']
|
344 |
+
|
345 |
+
rows, cols, cns = image.shape
|
346 |
+
|
347 |
+
smallest_side = min(rows, cols)
|
348 |
+
|
349 |
+
# rescale the image so the smallest side is min_side
|
350 |
+
scale = min_side / smallest_side
|
351 |
+
|
352 |
+
# check if the largest side is now greater than max_side, which can happen
|
353 |
+
# when images have a large aspect ratio
|
354 |
+
largest_side = max(rows, cols)
|
355 |
+
|
356 |
+
if largest_side * scale > max_side:
|
357 |
+
scale = max_side / largest_side
|
358 |
+
|
359 |
+
# resize the image with the computed scale
|
360 |
+
image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
|
361 |
+
rows, cols, cns = image.shape
|
362 |
+
|
363 |
+
pad_w = 32 - rows%32
|
364 |
+
pad_h = 32 - cols%32
|
365 |
+
|
366 |
+
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
|
367 |
+
new_image[:rows, :cols, :] = image.astype(np.float32)
|
368 |
+
|
369 |
+
annots[:, :4] *= scale
|
370 |
+
|
371 |
+
return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
|
372 |
+
|
373 |
+
|
374 |
+
class Augmenter(object):
|
375 |
+
"""Convert ndarrays in sample to Tensors."""
|
376 |
+
|
377 |
+
def __call__(self, sample, flip_x=0.5):
|
378 |
+
|
379 |
+
if np.random.rand() < flip_x:
|
380 |
+
image, annots = sample['img'], sample['annot']
|
381 |
+
image = image[:, ::-1, :]
|
382 |
+
|
383 |
+
rows, cols, channels = image.shape
|
384 |
+
|
385 |
+
x1 = annots[:, 0].copy()
|
386 |
+
x2 = annots[:, 2].copy()
|
387 |
+
|
388 |
+
x_tmp = x1.copy()
|
389 |
+
|
390 |
+
annots[:, 0] = cols - x2
|
391 |
+
annots[:, 2] = cols - x_tmp
|
392 |
+
|
393 |
+
sample = {'img': image, 'annot': annots}
|
394 |
+
|
395 |
+
return sample
|
396 |
+
|
397 |
+
|
398 |
+
class Normalizer(object):
|
399 |
+
|
400 |
+
def __init__(self):
|
401 |
+
self.mean = np.array([[[0.485, 0.456, 0.406]]])
|
402 |
+
self.std = np.array([[[0.229, 0.224, 0.225]]])
|
403 |
+
|
404 |
+
def __call__(self, sample):
|
405 |
+
|
406 |
+
image, annots = sample['img'], sample['annot']
|
407 |
+
|
408 |
+
return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
|
409 |
+
|
410 |
+
class UnNormalizer(object):
|
411 |
+
def __init__(self, mean=None, std=None):
|
412 |
+
if mean == None:
|
413 |
+
self.mean = [0.485, 0.456, 0.406]
|
414 |
+
else:
|
415 |
+
self.mean = mean
|
416 |
+
if std == None:
|
417 |
+
self.std = [0.229, 0.224, 0.225]
|
418 |
+
else:
|
419 |
+
self.std = std
|
420 |
+
|
421 |
+
def __call__(self, tensor):
|
422 |
+
"""
|
423 |
+
Args:
|
424 |
+
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
|
425 |
+
Returns:
|
426 |
+
Tensor: Normalized image.
|
427 |
+
"""
|
428 |
+
for t, m, s in zip(tensor, self.mean, self.std):
|
429 |
+
t.mul_(s).add_(m)
|
430 |
+
return tensor
|
431 |
+
|
432 |
+
|
433 |
+
class AspectRatioBasedSampler(Sampler):
|
434 |
+
|
435 |
+
def __init__(self, data_source, batch_size, drop_last):
|
436 |
+
self.data_source = data_source
|
437 |
+
self.batch_size = batch_size
|
438 |
+
self.drop_last = drop_last
|
439 |
+
self.groups = self.group_images()
|
440 |
+
|
441 |
+
def __iter__(self):
|
442 |
+
random.shuffle(self.groups)
|
443 |
+
for group in self.groups:
|
444 |
+
yield group
|
445 |
+
|
446 |
+
def __len__(self):
|
447 |
+
if self.drop_last:
|
448 |
+
return len(self.data_source) // self.batch_size
|
449 |
+
else:
|
450 |
+
return (len(self.data_source) + self.batch_size - 1) // self.batch_size
|
451 |
+
|
452 |
+
def group_images(self):
|
453 |
+
# determine the order of the images
|
454 |
+
order = list(range(len(self.data_source)))
|
455 |
+
order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
|
456 |
+
|
457 |
+
# divide into groups, one group = one batch
|
458 |
+
return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
|
retinanet/losses.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
|
5 |
+
def calc_iou(a, b):
|
6 |
+
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
|
7 |
+
|
8 |
+
iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
|
9 |
+
ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
|
10 |
+
|
11 |
+
iw = torch.clamp(iw, min=0)
|
12 |
+
ih = torch.clamp(ih, min=0)
|
13 |
+
|
14 |
+
ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
|
15 |
+
|
16 |
+
ua = torch.clamp(ua, min=1e-8)
|
17 |
+
|
18 |
+
intersection = iw * ih
|
19 |
+
|
20 |
+
IoU = intersection / ua
|
21 |
+
|
22 |
+
return IoU
|
23 |
+
|
24 |
+
class FocalLoss(nn.Module):
|
25 |
+
#def __init__(self):
|
26 |
+
|
27 |
+
def forward(self, classifications, regressions, anchors, annotations):
|
28 |
+
alpha = 0.25
|
29 |
+
gamma = 2.0
|
30 |
+
batch_size = classifications.shape[0]
|
31 |
+
classification_losses = []
|
32 |
+
regression_losses = []
|
33 |
+
|
34 |
+
anchor = anchors[0, :, :]
|
35 |
+
|
36 |
+
anchor_widths = anchor[:, 2] - anchor[:, 0]
|
37 |
+
anchor_heights = anchor[:, 3] - anchor[:, 1]
|
38 |
+
anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
|
39 |
+
anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
|
40 |
+
|
41 |
+
for j in range(batch_size):
|
42 |
+
|
43 |
+
classification = classifications[j, :, :]
|
44 |
+
regression = regressions[j, :, :]
|
45 |
+
|
46 |
+
bbox_annotation = annotations[j, :, :]
|
47 |
+
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
|
48 |
+
|
49 |
+
classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
|
50 |
+
|
51 |
+
if bbox_annotation.shape[0] == 0:
|
52 |
+
if torch.cuda.is_available():
|
53 |
+
alpha_factor = torch.ones(classification.shape).cuda() * alpha
|
54 |
+
|
55 |
+
alpha_factor = 1. - alpha_factor
|
56 |
+
focal_weight = classification
|
57 |
+
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
|
58 |
+
|
59 |
+
bce = -(torch.log(1.0 - classification))
|
60 |
+
|
61 |
+
# cls_loss = focal_weight * torch.pow(bce, gamma)
|
62 |
+
cls_loss = focal_weight * bce
|
63 |
+
classification_losses.append(cls_loss.sum())
|
64 |
+
regression_losses.append(torch.tensor(0).float().cuda())
|
65 |
+
|
66 |
+
else:
|
67 |
+
alpha_factor = torch.ones(classification.shape) * alpha
|
68 |
+
|
69 |
+
alpha_factor = 1. - alpha_factor
|
70 |
+
focal_weight = classification
|
71 |
+
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
|
72 |
+
|
73 |
+
bce = -(torch.log(1.0 - classification))
|
74 |
+
|
75 |
+
# cls_loss = focal_weight * torch.pow(bce, gamma)
|
76 |
+
cls_loss = focal_weight * bce
|
77 |
+
classification_losses.append(cls_loss.sum())
|
78 |
+
regression_losses.append(torch.tensor(0).float())
|
79 |
+
|
80 |
+
continue
|
81 |
+
|
82 |
+
IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
|
83 |
+
|
84 |
+
IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
|
85 |
+
|
86 |
+
#import pdb
|
87 |
+
#pdb.set_trace()
|
88 |
+
|
89 |
+
# compute the loss for classification
|
90 |
+
targets = torch.ones(classification.shape) * -1
|
91 |
+
|
92 |
+
if torch.cuda.is_available():
|
93 |
+
targets = targets.cuda()
|
94 |
+
|
95 |
+
targets[torch.lt(IoU_max, 0.4), :] = 0
|
96 |
+
|
97 |
+
positive_indices = torch.ge(IoU_max, 0.5)
|
98 |
+
|
99 |
+
num_positive_anchors = positive_indices.sum()
|
100 |
+
|
101 |
+
assigned_annotations = bbox_annotation[IoU_argmax, :]
|
102 |
+
|
103 |
+
targets[positive_indices, :] = 0
|
104 |
+
targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
|
105 |
+
|
106 |
+
if torch.cuda.is_available():
|
107 |
+
alpha_factor = torch.ones(targets.shape).cuda() * alpha
|
108 |
+
else:
|
109 |
+
alpha_factor = torch.ones(targets.shape) * alpha
|
110 |
+
|
111 |
+
alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
|
112 |
+
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
|
113 |
+
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
|
114 |
+
|
115 |
+
bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
|
116 |
+
|
117 |
+
# cls_loss = focal_weight * torch.pow(bce, gamma)
|
118 |
+
cls_loss = focal_weight * bce
|
119 |
+
|
120 |
+
if torch.cuda.is_available():
|
121 |
+
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
|
122 |
+
else:
|
123 |
+
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape))
|
124 |
+
|
125 |
+
classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
|
126 |
+
|
127 |
+
# compute the loss for regression
|
128 |
+
|
129 |
+
if positive_indices.sum() > 0:
|
130 |
+
assigned_annotations = assigned_annotations[positive_indices, :]
|
131 |
+
|
132 |
+
anchor_widths_pi = anchor_widths[positive_indices]
|
133 |
+
anchor_heights_pi = anchor_heights[positive_indices]
|
134 |
+
anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
|
135 |
+
anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
|
136 |
+
|
137 |
+
gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
|
138 |
+
gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
|
139 |
+
gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
|
140 |
+
gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
|
141 |
+
|
142 |
+
# clip widths to 1
|
143 |
+
gt_widths = torch.clamp(gt_widths, min=1)
|
144 |
+
gt_heights = torch.clamp(gt_heights, min=1)
|
145 |
+
|
146 |
+
targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
|
147 |
+
targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
|
148 |
+
targets_dw = torch.log(gt_widths / anchor_widths_pi)
|
149 |
+
targets_dh = torch.log(gt_heights / anchor_heights_pi)
|
150 |
+
|
151 |
+
targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
|
152 |
+
targets = targets.t()
|
153 |
+
|
154 |
+
if torch.cuda.is_available():
|
155 |
+
targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
|
156 |
+
else:
|
157 |
+
targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
|
158 |
+
|
159 |
+
negative_indices = 1 + (~positive_indices)
|
160 |
+
|
161 |
+
regression_diff = torch.abs(targets - regression[positive_indices, :])
|
162 |
+
|
163 |
+
regression_loss = torch.where(
|
164 |
+
torch.le(regression_diff, 1.0 / 9.0),
|
165 |
+
0.5 * 9.0 * torch.pow(regression_diff, 2),
|
166 |
+
regression_diff - 0.5 / 9.0
|
167 |
+
)
|
168 |
+
regression_losses.append(regression_loss.mean())
|
169 |
+
else:
|
170 |
+
if torch.cuda.is_available():
|
171 |
+
regression_losses.append(torch.tensor(0).float().cuda())
|
172 |
+
else:
|
173 |
+
regression_losses.append(torch.tensor(0).float())
|
174 |
+
|
175 |
+
return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
|
176 |
+
|
177 |
+
|
retinanet/model.py
ADDED
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch
|
3 |
+
import math
|
4 |
+
import torch.utils.model_zoo as model_zoo
|
5 |
+
from torchvision.ops import nms
|
6 |
+
from retinanet.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
|
7 |
+
from retinanet.anchors import Anchors
|
8 |
+
from retinanet import losses
|
9 |
+
|
10 |
+
model_urls = {
|
11 |
+
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
|
12 |
+
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
|
13 |
+
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
14 |
+
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
|
15 |
+
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
|
16 |
+
}
|
17 |
+
|
18 |
+
|
19 |
+
class PyramidFeatures(nn.Module):
|
20 |
+
def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
|
21 |
+
super(PyramidFeatures, self).__init__()
|
22 |
+
|
23 |
+
# upsample C5 to get P5 from the FPN paper
|
24 |
+
self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
|
25 |
+
self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
26 |
+
self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
27 |
+
|
28 |
+
# add P5 elementwise to C4
|
29 |
+
self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
|
30 |
+
self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
|
31 |
+
self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
32 |
+
|
33 |
+
# add P4 elementwise to C3
|
34 |
+
self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
|
35 |
+
self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
|
36 |
+
|
37 |
+
# "P6 is obtained via a 3x3 stride-2 conv on C5"
|
38 |
+
self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1)
|
39 |
+
|
40 |
+
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
|
41 |
+
self.P7_1 = nn.ReLU()
|
42 |
+
self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
|
43 |
+
|
44 |
+
def forward(self, inputs):
|
45 |
+
C3, C4, C5 = inputs
|
46 |
+
|
47 |
+
P5_x = self.P5_1(C5)
|
48 |
+
P5_upsampled_x = self.P5_upsampled(P5_x)
|
49 |
+
P5_x = self.P5_2(P5_x)
|
50 |
+
|
51 |
+
P4_x = self.P4_1(C4)
|
52 |
+
P4_x = P5_upsampled_x + P4_x
|
53 |
+
P4_upsampled_x = self.P4_upsampled(P4_x)
|
54 |
+
P4_x = self.P4_2(P4_x)
|
55 |
+
|
56 |
+
P3_x = self.P3_1(C3)
|
57 |
+
P3_x = P3_x + P4_upsampled_x
|
58 |
+
P3_x = self.P3_2(P3_x)
|
59 |
+
|
60 |
+
P6_x = self.P6(C5)
|
61 |
+
|
62 |
+
P7_x = self.P7_1(P6_x)
|
63 |
+
P7_x = self.P7_2(P7_x)
|
64 |
+
|
65 |
+
return [P3_x, P4_x, P5_x, P6_x, P7_x]
|
66 |
+
|
67 |
+
|
68 |
+
class RegressionModel(nn.Module):
|
69 |
+
def __init__(self, num_features_in, num_anchors=9, feature_size=256):
|
70 |
+
super(RegressionModel, self).__init__()
|
71 |
+
|
72 |
+
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
|
73 |
+
self.act1 = nn.ReLU()
|
74 |
+
|
75 |
+
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
76 |
+
self.act2 = nn.ReLU()
|
77 |
+
|
78 |
+
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
79 |
+
self.act3 = nn.ReLU()
|
80 |
+
|
81 |
+
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
82 |
+
self.act4 = nn.ReLU()
|
83 |
+
|
84 |
+
self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
|
85 |
+
|
86 |
+
def forward(self, x):
|
87 |
+
out = self.conv1(x)
|
88 |
+
out = self.act1(out)
|
89 |
+
|
90 |
+
out = self.conv2(out)
|
91 |
+
out = self.act2(out)
|
92 |
+
|
93 |
+
out = self.conv3(out)
|
94 |
+
out = self.act3(out)
|
95 |
+
|
96 |
+
out = self.conv4(out)
|
97 |
+
out = self.act4(out)
|
98 |
+
|
99 |
+
out = self.output(out)
|
100 |
+
|
101 |
+
# out is B x C x W x H, with C = 4*num_anchors
|
102 |
+
out = out.permute(0, 2, 3, 1)
|
103 |
+
|
104 |
+
return out.contiguous().view(out.shape[0], -1, 4)
|
105 |
+
|
106 |
+
|
107 |
+
class ClassificationModel(nn.Module):
|
108 |
+
def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
|
109 |
+
super(ClassificationModel, self).__init__()
|
110 |
+
|
111 |
+
self.num_classes = num_classes
|
112 |
+
self.num_anchors = num_anchors
|
113 |
+
|
114 |
+
self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
|
115 |
+
self.act1 = nn.ReLU()
|
116 |
+
|
117 |
+
self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
118 |
+
self.act2 = nn.ReLU()
|
119 |
+
|
120 |
+
self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
121 |
+
self.act3 = nn.ReLU()
|
122 |
+
|
123 |
+
self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
|
124 |
+
self.act4 = nn.ReLU()
|
125 |
+
|
126 |
+
self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
|
127 |
+
self.output_act = nn.Sigmoid()
|
128 |
+
|
129 |
+
def forward(self, x):
|
130 |
+
out = self.conv1(x)
|
131 |
+
out = self.act1(out)
|
132 |
+
|
133 |
+
out = self.conv2(out)
|
134 |
+
out = self.act2(out)
|
135 |
+
|
136 |
+
out = self.conv3(out)
|
137 |
+
out = self.act3(out)
|
138 |
+
|
139 |
+
out = self.conv4(out)
|
140 |
+
out = self.act4(out)
|
141 |
+
|
142 |
+
out = self.output(out)
|
143 |
+
out = self.output_act(out)
|
144 |
+
|
145 |
+
# out is B x C x W x H, with C = n_classes + n_anchors
|
146 |
+
out1 = out.permute(0, 2, 3, 1)
|
147 |
+
|
148 |
+
batch_size, width, height, channels = out1.shape
|
149 |
+
|
150 |
+
out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
|
151 |
+
|
152 |
+
return out2.contiguous().view(x.shape[0], -1, self.num_classes)
|
153 |
+
|
154 |
+
|
155 |
+
class ResNet(nn.Module):
|
156 |
+
|
157 |
+
def __init__(self, num_classes, block, layers):
|
158 |
+
self.inplanes = 64
|
159 |
+
super(ResNet, self).__init__()
|
160 |
+
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
161 |
+
self.bn1 = nn.BatchNorm2d(64)
|
162 |
+
self.relu = nn.ReLU(inplace=True)
|
163 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
164 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
165 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
166 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
167 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
168 |
+
|
169 |
+
if block == BasicBlock:
|
170 |
+
fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
|
171 |
+
self.layer4[layers[3] - 1].conv2.out_channels]
|
172 |
+
elif block == Bottleneck:
|
173 |
+
fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
|
174 |
+
self.layer4[layers[3] - 1].conv3.out_channels]
|
175 |
+
else:
|
176 |
+
raise ValueError(f"Block type {block} not understood")
|
177 |
+
|
178 |
+
self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
|
179 |
+
|
180 |
+
self.regressionModel = RegressionModel(256)
|
181 |
+
self.classificationModel = ClassificationModel(256, num_classes=num_classes)
|
182 |
+
|
183 |
+
self.anchors = Anchors()
|
184 |
+
|
185 |
+
self.regressBoxes = BBoxTransform()
|
186 |
+
|
187 |
+
self.clipBoxes = ClipBoxes()
|
188 |
+
|
189 |
+
self.focalLoss = losses.FocalLoss()
|
190 |
+
|
191 |
+
for m in self.modules():
|
192 |
+
if isinstance(m, nn.Conv2d):
|
193 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
194 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
195 |
+
elif isinstance(m, nn.BatchNorm2d):
|
196 |
+
m.weight.data.fill_(1)
|
197 |
+
m.bias.data.zero_()
|
198 |
+
|
199 |
+
prior = 0.01
|
200 |
+
|
201 |
+
self.classificationModel.output.weight.data.fill_(0)
|
202 |
+
self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
|
203 |
+
|
204 |
+
self.regressionModel.output.weight.data.fill_(0)
|
205 |
+
self.regressionModel.output.bias.data.fill_(0)
|
206 |
+
|
207 |
+
self.freeze_bn()
|
208 |
+
|
209 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
210 |
+
downsample = None
|
211 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
212 |
+
downsample = nn.Sequential(
|
213 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
214 |
+
kernel_size=1, stride=stride, bias=False),
|
215 |
+
nn.BatchNorm2d(planes * block.expansion),
|
216 |
+
)
|
217 |
+
|
218 |
+
layers = [block(self.inplanes, planes, stride, downsample)]
|
219 |
+
self.inplanes = planes * block.expansion
|
220 |
+
for i in range(1, blocks):
|
221 |
+
layers.append(block(self.inplanes, planes))
|
222 |
+
|
223 |
+
return nn.Sequential(*layers)
|
224 |
+
|
225 |
+
def freeze_bn(self):
|
226 |
+
'''Freeze BatchNorm layers.'''
|
227 |
+
for layer in self.modules():
|
228 |
+
if isinstance(layer, nn.BatchNorm2d):
|
229 |
+
layer.eval()
|
230 |
+
|
231 |
+
def forward(self, inputs):
|
232 |
+
|
233 |
+
if self.training:
|
234 |
+
img_batch, annotations = inputs
|
235 |
+
else:
|
236 |
+
img_batch = inputs
|
237 |
+
|
238 |
+
x = self.conv1(img_batch)
|
239 |
+
x = self.bn1(x)
|
240 |
+
x = self.relu(x)
|
241 |
+
x = self.maxpool(x)
|
242 |
+
|
243 |
+
x1 = self.layer1(x)
|
244 |
+
x2 = self.layer2(x1)
|
245 |
+
x3 = self.layer3(x2)
|
246 |
+
x4 = self.layer4(x3)
|
247 |
+
|
248 |
+
features = self.fpn([x2, x3, x4])
|
249 |
+
|
250 |
+
regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
|
251 |
+
|
252 |
+
classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
|
253 |
+
|
254 |
+
anchors = self.anchors(img_batch)
|
255 |
+
|
256 |
+
if self.training:
|
257 |
+
return self.focalLoss(classification, regression, anchors, annotations)
|
258 |
+
else:
|
259 |
+
transformed_anchors = self.regressBoxes(anchors, regression)
|
260 |
+
transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
|
261 |
+
|
262 |
+
finalResult = [[], [], []]
|
263 |
+
|
264 |
+
finalScores = torch.Tensor([])
|
265 |
+
finalAnchorBoxesIndexes = torch.Tensor([]).long()
|
266 |
+
finalAnchorBoxesCoordinates = torch.Tensor([])
|
267 |
+
|
268 |
+
if torch.cuda.is_available():
|
269 |
+
finalScores = finalScores.cuda()
|
270 |
+
finalAnchorBoxesIndexes = finalAnchorBoxesIndexes.cuda()
|
271 |
+
finalAnchorBoxesCoordinates = finalAnchorBoxesCoordinates.cuda()
|
272 |
+
|
273 |
+
for i in range(classification.shape[2]):
|
274 |
+
scores = torch.squeeze(classification[:, :, i])
|
275 |
+
scores_over_thresh = (scores > 0.05)
|
276 |
+
if scores_over_thresh.sum() == 0:
|
277 |
+
# no boxes to NMS, just continue
|
278 |
+
continue
|
279 |
+
|
280 |
+
scores = scores[scores_over_thresh]
|
281 |
+
anchorBoxes = torch.squeeze(transformed_anchors)
|
282 |
+
anchorBoxes = anchorBoxes[scores_over_thresh]
|
283 |
+
anchors_nms_idx = nms(anchorBoxes, scores, 0.5)
|
284 |
+
|
285 |
+
finalResult[0].extend(scores[anchors_nms_idx])
|
286 |
+
finalResult[1].extend(torch.tensor([i] * anchors_nms_idx.shape[0]))
|
287 |
+
finalResult[2].extend(anchorBoxes[anchors_nms_idx])
|
288 |
+
|
289 |
+
finalScores = torch.cat((finalScores, scores[anchors_nms_idx]))
|
290 |
+
finalAnchorBoxesIndexesValue = torch.tensor([i] * anchors_nms_idx.shape[0])
|
291 |
+
if torch.cuda.is_available():
|
292 |
+
finalAnchorBoxesIndexesValue = finalAnchorBoxesIndexesValue.cuda()
|
293 |
+
|
294 |
+
finalAnchorBoxesIndexes = torch.cat((finalAnchorBoxesIndexes, finalAnchorBoxesIndexesValue))
|
295 |
+
finalAnchorBoxesCoordinates = torch.cat((finalAnchorBoxesCoordinates, anchorBoxes[anchors_nms_idx]))
|
296 |
+
|
297 |
+
return [finalScores, finalAnchorBoxesIndexes, finalAnchorBoxesCoordinates]
|
298 |
+
|
299 |
+
|
300 |
+
|
301 |
+
def resnet18(num_classes, pretrained=False, **kwargs):
|
302 |
+
"""Constructs a ResNet-18 model.
|
303 |
+
Args:
|
304 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
305 |
+
"""
|
306 |
+
model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
|
307 |
+
if pretrained:
|
308 |
+
model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False)
|
309 |
+
return model
|
310 |
+
|
311 |
+
|
312 |
+
def resnet34(num_classes, pretrained=False, **kwargs):
|
313 |
+
"""Constructs a ResNet-34 model.
|
314 |
+
Args:
|
315 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
316 |
+
"""
|
317 |
+
model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
|
318 |
+
if pretrained:
|
319 |
+
model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False)
|
320 |
+
return model
|
321 |
+
|
322 |
+
|
323 |
+
def resnet50(num_classes, pretrained=False, **kwargs):
|
324 |
+
"""Constructs a ResNet-50 model.
|
325 |
+
Args:
|
326 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
327 |
+
"""
|
328 |
+
model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
|
329 |
+
if pretrained:
|
330 |
+
model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False)
|
331 |
+
return model
|
332 |
+
|
333 |
+
|
334 |
+
def resnet101(num_classes, pretrained=False, **kwargs):
|
335 |
+
"""Constructs a ResNet-101 model.
|
336 |
+
Args:
|
337 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
338 |
+
"""
|
339 |
+
model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
|
340 |
+
if pretrained:
|
341 |
+
model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False)
|
342 |
+
return model
|
343 |
+
|
344 |
+
|
345 |
+
def resnet152(num_classes, pretrained=False, **kwargs):
|
346 |
+
"""Constructs a ResNet-152 model.
|
347 |
+
Args:
|
348 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
349 |
+
"""
|
350 |
+
model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
|
351 |
+
if pretrained:
|
352 |
+
model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False)
|
353 |
+
return model
|
retinanet/oid_dataset.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function, division
|
2 |
+
|
3 |
+
import csv
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import warnings
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import skimage
|
10 |
+
import skimage.color
|
11 |
+
import skimage.io
|
12 |
+
import skimage.transform
|
13 |
+
from PIL import Image
|
14 |
+
from torch.utils.data import Dataset
|
15 |
+
|
16 |
+
|
17 |
+
def get_labels(metadata_dir, version='v4'):
|
18 |
+
if version == 'v4' or version == 'challenge2018':
|
19 |
+
csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'
|
20 |
+
|
21 |
+
boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
|
22 |
+
id_to_labels = {}
|
23 |
+
cls_index = {}
|
24 |
+
|
25 |
+
i = 0
|
26 |
+
with open(boxable_classes_descriptions) as f:
|
27 |
+
for row in csv.reader(f):
|
28 |
+
# make sure the csv row is not empty (usually the last one)
|
29 |
+
if len(row):
|
30 |
+
label = row[0]
|
31 |
+
description = row[1].replace("\"", "").replace("'", "").replace('`', '')
|
32 |
+
|
33 |
+
id_to_labels[i] = description
|
34 |
+
cls_index[label] = i
|
35 |
+
|
36 |
+
i += 1
|
37 |
+
else:
|
38 |
+
trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
|
39 |
+
description_path = os.path.join(metadata_dir, 'class-descriptions.csv')
|
40 |
+
|
41 |
+
description_table = {}
|
42 |
+
with open(description_path) as f:
|
43 |
+
for row in csv.reader(f):
|
44 |
+
# make sure the csv row is not empty (usually the last one)
|
45 |
+
if len(row):
|
46 |
+
description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')
|
47 |
+
|
48 |
+
with open(trainable_classes_path, 'rb') as f:
|
49 |
+
trainable_classes = f.read().split('\n')
|
50 |
+
|
51 |
+
id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
|
52 |
+
cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])
|
53 |
+
|
54 |
+
return id_to_labels, cls_index
|
55 |
+
|
56 |
+
|
57 |
+
def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
|
58 |
+
validation_image_ids = {}
|
59 |
+
|
60 |
+
if version == 'v4':
|
61 |
+
annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
|
62 |
+
elif version == 'challenge2018':
|
63 |
+
validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')
|
64 |
+
|
65 |
+
with open(validation_image_ids_path, 'r') as csv_file:
|
66 |
+
reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
|
67 |
+
reader.next()
|
68 |
+
for line, row in enumerate(reader):
|
69 |
+
image_id = row['ImageID']
|
70 |
+
validation_image_ids[image_id] = True
|
71 |
+
|
72 |
+
annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
|
73 |
+
else:
|
74 |
+
annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')
|
75 |
+
|
76 |
+
fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
|
77 |
+
'XMin', 'XMax', 'YMin', 'YMax',
|
78 |
+
'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']
|
79 |
+
|
80 |
+
id_annotations = dict()
|
81 |
+
with open(annotations_path, 'r') as csv_file:
|
82 |
+
reader = csv.DictReader(csv_file, fieldnames=fieldnames)
|
83 |
+
next(reader)
|
84 |
+
|
85 |
+
images_sizes = {}
|
86 |
+
for line, row in enumerate(reader):
|
87 |
+
frame = row['ImageID']
|
88 |
+
|
89 |
+
if version == 'challenge2018':
|
90 |
+
if subset == 'train':
|
91 |
+
if frame in validation_image_ids:
|
92 |
+
continue
|
93 |
+
elif subset == 'validation':
|
94 |
+
if frame not in validation_image_ids:
|
95 |
+
continue
|
96 |
+
else:
|
97 |
+
raise NotImplementedError('This generator handles only the train and validation subsets')
|
98 |
+
|
99 |
+
class_name = row['LabelName']
|
100 |
+
|
101 |
+
if class_name not in cls_index:
|
102 |
+
continue
|
103 |
+
|
104 |
+
cls_id = cls_index[class_name]
|
105 |
+
|
106 |
+
if version == 'challenge2018':
|
107 |
+
# We recommend participants to use the provided subset of the training set as a validation set.
|
108 |
+
# This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
|
109 |
+
img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
|
110 |
+
else:
|
111 |
+
img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')
|
112 |
+
|
113 |
+
if frame in images_sizes:
|
114 |
+
width, height = images_sizes[frame]
|
115 |
+
else:
|
116 |
+
try:
|
117 |
+
with Image.open(img_path) as img:
|
118 |
+
width, height = img.width, img.height
|
119 |
+
images_sizes[frame] = (width, height)
|
120 |
+
except Exception as ex:
|
121 |
+
if version == 'challenge2018':
|
122 |
+
raise ex
|
123 |
+
continue
|
124 |
+
|
125 |
+
x1 = float(row['XMin'])
|
126 |
+
x2 = float(row['XMax'])
|
127 |
+
y1 = float(row['YMin'])
|
128 |
+
y2 = float(row['YMax'])
|
129 |
+
|
130 |
+
x1_int = int(round(x1 * width))
|
131 |
+
x2_int = int(round(x2 * width))
|
132 |
+
y1_int = int(round(y1 * height))
|
133 |
+
y2_int = int(round(y2 * height))
|
134 |
+
|
135 |
+
# Check that the bounding box is valid.
|
136 |
+
if x2 <= x1:
|
137 |
+
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
|
138 |
+
if y2 <= y1:
|
139 |
+
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
|
140 |
+
|
141 |
+
if y2_int == y1_int:
|
142 |
+
warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
|
143 |
+
continue
|
144 |
+
|
145 |
+
if x2_int == x1_int:
|
146 |
+
warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
|
147 |
+
continue
|
148 |
+
|
149 |
+
img_id = row['ImageID']
|
150 |
+
annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}
|
151 |
+
|
152 |
+
if img_id in id_annotations:
|
153 |
+
annotations = id_annotations[img_id]
|
154 |
+
annotations['boxes'].append(annotation)
|
155 |
+
else:
|
156 |
+
id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
|
157 |
+
return id_annotations
|
158 |
+
|
159 |
+
|
160 |
+
class OidDataset(Dataset):
|
161 |
+
"""Oid dataset."""
|
162 |
+
|
163 |
+
def __init__(self, main_dir, subset, version='v4', annotation_cache_dir='.', transform=None):
|
164 |
+
if version == 'v4':
|
165 |
+
metadata = '2018_04'
|
166 |
+
elif version == 'challenge2018':
|
167 |
+
metadata = 'challenge2018'
|
168 |
+
elif version == 'v3':
|
169 |
+
metadata = '2017_11'
|
170 |
+
else:
|
171 |
+
raise NotImplementedError('There is currently no implementation for versions older than v3')
|
172 |
+
|
173 |
+
self.transform = transform
|
174 |
+
|
175 |
+
if version == 'challenge2018':
|
176 |
+
self.base_dir = os.path.join(main_dir, 'images', 'train')
|
177 |
+
else:
|
178 |
+
self.base_dir = os.path.join(main_dir, 'images', subset)
|
179 |
+
|
180 |
+
metadata_dir = os.path.join(main_dir, metadata)
|
181 |
+
annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')
|
182 |
+
|
183 |
+
self.id_to_labels, cls_index = get_labels(metadata_dir, version=version)
|
184 |
+
|
185 |
+
if os.path.exists(annotation_cache_json):
|
186 |
+
with open(annotation_cache_json, 'r') as f:
|
187 |
+
self.annotations = json.loads(f.read())
|
188 |
+
else:
|
189 |
+
self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index,
|
190 |
+
version=version)
|
191 |
+
json.dump(self.annotations, open(annotation_cache_json, "w"))
|
192 |
+
|
193 |
+
self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])
|
194 |
+
|
195 |
+
# (label -> name)
|
196 |
+
self.labels = self.id_to_labels
|
197 |
+
|
198 |
+
def __len__(self):
|
199 |
+
return len(self.annotations)
|
200 |
+
|
201 |
+
def __getitem__(self, idx):
|
202 |
+
|
203 |
+
img = self.load_image(idx)
|
204 |
+
annot = self.load_annotations(idx)
|
205 |
+
sample = {'img': img, 'annot': annot}
|
206 |
+
if self.transform:
|
207 |
+
sample = self.transform(sample)
|
208 |
+
|
209 |
+
return sample
|
210 |
+
|
211 |
+
def image_path(self, image_index):
|
212 |
+
path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
|
213 |
+
return path
|
214 |
+
|
215 |
+
def load_image(self, image_index):
|
216 |
+
path = self.image_path(image_index)
|
217 |
+
img = skimage.io.imread(path)
|
218 |
+
|
219 |
+
if len(img.shape) == 1:
|
220 |
+
img = img[0]
|
221 |
+
|
222 |
+
if len(img.shape) == 2:
|
223 |
+
img = skimage.color.gray2rgb(img)
|
224 |
+
|
225 |
+
try:
|
226 |
+
return img.astype(np.float32) / 255.0
|
227 |
+
except Exception:
|
228 |
+
print (path)
|
229 |
+
exit(0)
|
230 |
+
|
231 |
+
def load_annotations(self, image_index):
|
232 |
+
# get ground truth annotations
|
233 |
+
image_annotations = self.annotations[self.id_to_image_id[image_index]]
|
234 |
+
|
235 |
+
labels = image_annotations['boxes']
|
236 |
+
height, width = image_annotations['h'], image_annotations['w']
|
237 |
+
|
238 |
+
boxes = np.zeros((len(labels), 5))
|
239 |
+
for idx, ann in enumerate(labels):
|
240 |
+
cls_id = ann['cls_id']
|
241 |
+
x1 = ann['x1'] * width
|
242 |
+
x2 = ann['x2'] * width
|
243 |
+
y1 = ann['y1'] * height
|
244 |
+
y2 = ann['y2'] * height
|
245 |
+
|
246 |
+
boxes[idx, 0] = x1
|
247 |
+
boxes[idx, 1] = y1
|
248 |
+
boxes[idx, 2] = x2
|
249 |
+
boxes[idx, 3] = y2
|
250 |
+
boxes[idx, 4] = cls_id
|
251 |
+
|
252 |
+
return boxes
|
253 |
+
|
254 |
+
def image_aspect_ratio(self, image_index):
|
255 |
+
img_annotations = self.annotations[self.id_to_image_id[image_index]]
|
256 |
+
height, width = img_annotations['h'], img_annotations['w']
|
257 |
+
return float(width) / float(height)
|
258 |
+
|
259 |
+
def num_classes(self):
|
260 |
+
return len(self.id_to_labels)
|
retinanet/utils.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
7 |
+
"""3x3 convolution with padding"""
|
8 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
9 |
+
padding=1, bias=False)
|
10 |
+
|
11 |
+
|
12 |
+
class BasicBlock(nn.Module):
|
13 |
+
expansion = 1
|
14 |
+
|
15 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
16 |
+
super(BasicBlock, self).__init__()
|
17 |
+
self.conv1 = conv3x3(inplanes, planes, stride)
|
18 |
+
self.bn1 = nn.BatchNorm2d(planes)
|
19 |
+
self.relu = nn.ReLU(inplace=True)
|
20 |
+
self.conv2 = conv3x3(planes, planes)
|
21 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
22 |
+
self.downsample = downsample
|
23 |
+
self.stride = stride
|
24 |
+
|
25 |
+
def forward(self, x):
|
26 |
+
residual = x
|
27 |
+
|
28 |
+
out = self.conv1(x)
|
29 |
+
out = self.bn1(out)
|
30 |
+
out = self.relu(out)
|
31 |
+
|
32 |
+
out = self.conv2(out)
|
33 |
+
out = self.bn2(out)
|
34 |
+
|
35 |
+
if self.downsample is not None:
|
36 |
+
residual = self.downsample(x)
|
37 |
+
|
38 |
+
out += residual
|
39 |
+
out = self.relu(out)
|
40 |
+
|
41 |
+
return out
|
42 |
+
|
43 |
+
|
44 |
+
class Bottleneck(nn.Module):
|
45 |
+
expansion = 4
|
46 |
+
|
47 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
48 |
+
super(Bottleneck, self).__init__()
|
49 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
50 |
+
self.bn1 = nn.BatchNorm2d(planes)
|
51 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
52 |
+
padding=1, bias=False)
|
53 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
54 |
+
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
55 |
+
self.bn3 = nn.BatchNorm2d(planes * 4)
|
56 |
+
self.relu = nn.ReLU(inplace=True)
|
57 |
+
self.downsample = downsample
|
58 |
+
self.stride = stride
|
59 |
+
|
60 |
+
def forward(self, x):
|
61 |
+
residual = x
|
62 |
+
|
63 |
+
out = self.conv1(x)
|
64 |
+
out = self.bn1(out)
|
65 |
+
out = self.relu(out)
|
66 |
+
|
67 |
+
out = self.conv2(out)
|
68 |
+
out = self.bn2(out)
|
69 |
+
out = self.relu(out)
|
70 |
+
|
71 |
+
out = self.conv3(out)
|
72 |
+
out = self.bn3(out)
|
73 |
+
|
74 |
+
if self.downsample is not None:
|
75 |
+
residual = self.downsample(x)
|
76 |
+
|
77 |
+
out += residual
|
78 |
+
out = self.relu(out)
|
79 |
+
|
80 |
+
return out
|
81 |
+
|
82 |
+
class BBoxTransform(nn.Module):
|
83 |
+
|
84 |
+
def __init__(self, mean=None, std=None):
|
85 |
+
super(BBoxTransform, self).__init__()
|
86 |
+
if mean is None:
|
87 |
+
if torch.cuda.is_available():
|
88 |
+
self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
|
89 |
+
else:
|
90 |
+
self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
|
91 |
+
|
92 |
+
else:
|
93 |
+
self.mean = mean
|
94 |
+
if std is None:
|
95 |
+
if torch.cuda.is_available():
|
96 |
+
self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
|
97 |
+
else:
|
98 |
+
self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
|
99 |
+
else:
|
100 |
+
self.std = std
|
101 |
+
|
102 |
+
def forward(self, boxes, deltas):
|
103 |
+
|
104 |
+
widths = boxes[:, :, 2] - boxes[:, :, 0]
|
105 |
+
heights = boxes[:, :, 3] - boxes[:, :, 1]
|
106 |
+
ctr_x = boxes[:, :, 0] + 0.5 * widths
|
107 |
+
ctr_y = boxes[:, :, 1] + 0.5 * heights
|
108 |
+
|
109 |
+
dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
|
110 |
+
dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
|
111 |
+
dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
|
112 |
+
dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
|
113 |
+
|
114 |
+
pred_ctr_x = ctr_x + dx * widths
|
115 |
+
pred_ctr_y = ctr_y + dy * heights
|
116 |
+
pred_w = torch.exp(dw) * widths
|
117 |
+
pred_h = torch.exp(dh) * heights
|
118 |
+
|
119 |
+
pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
|
120 |
+
pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
|
121 |
+
pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
|
122 |
+
pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
|
123 |
+
|
124 |
+
pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
|
125 |
+
|
126 |
+
return pred_boxes
|
127 |
+
|
128 |
+
|
129 |
+
class ClipBoxes(nn.Module):
|
130 |
+
|
131 |
+
def __init__(self, width=None, height=None):
|
132 |
+
super(ClipBoxes, self).__init__()
|
133 |
+
|
134 |
+
def forward(self, boxes, img):
|
135 |
+
|
136 |
+
batch_size, num_channels, height, width = img.shape
|
137 |
+
|
138 |
+
boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
|
139 |
+
boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
|
140 |
+
|
141 |
+
boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
|
142 |
+
boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
|
143 |
+
|
144 |
+
return boxes
|