Gosula commited on
Commit
0afe8bf
1 Parent(s): 996bd16

Upload 3 files

Browse files
Files changed (3) hide show
  1. dataset.py +181 -0
  2. dataset_org.py +127 -0
  3. loss.py +79 -0
dataset.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
3
+ """
4
+
5
+ import config
6
+ import numpy as np
7
+ import os
8
+ import pandas as pd
9
+ import torch
10
+ from utils import xywhn2xyxy, xyxy2xywhn
11
+ import random
12
+
13
+ from PIL import Image, ImageFile
14
+ from torch.utils.data import Dataset, DataLoader
15
+ from utils import (
16
+ cells_to_bboxes,
17
+ iou_width_height as iou,
18
+ non_max_suppression as nms,
19
+ plot_image
20
+ )
21
+
22
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
23
+
24
+ class YOLODataset(Dataset):
25
+ def __init__(
26
+ self,
27
+ csv_file,
28
+ img_dir,
29
+ label_dir,
30
+ anchors,
31
+ image_size=416,
32
+ S=[13, 26, 52],
33
+ C=20,
34
+ transform=None,
35
+ ):
36
+ self.annotations = pd.read_csv(csv_file)
37
+ self.img_dir = img_dir
38
+ self.label_dir = label_dir
39
+ self.image_size = image_size
40
+ self.mosaic_border = [image_size // 2, image_size // 2]
41
+ self.transform = transform
42
+ self.S = S
43
+ self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
44
+ self.num_anchors = self.anchors.shape[0]
45
+ self.num_anchors_per_scale = self.num_anchors // 3
46
+ self.C = C
47
+ self.ignore_iou_thresh = 0.5
48
+
49
+ def __len__(self):
50
+ return len(self.annotations)
51
+
52
+ def load_mosaic(self, index):
53
+ # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
54
+ labels4 = []
55
+ s = self.image_size
56
+ yc, xc = (int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border) # mosaic center x, y
57
+ indices = [index] + random.choices(range(len(self)), k=3) # 3 additional image indices
58
+ random.shuffle(indices)
59
+ for i, index in enumerate(indices):
60
+ # Load image
61
+ label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
62
+ bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
63
+ img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
64
+ img = np.array(Image.open(img_path).convert("RGB"))
65
+
66
+
67
+ h, w = img.shape[0], img.shape[1]
68
+ labels = np.array(bboxes)
69
+
70
+ # place img in img4
71
+ if i == 0: # top left
72
+ img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
73
+ x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
74
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
75
+ elif i == 1: # top right
76
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
77
+ x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
78
+ elif i == 2: # bottom left
79
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
80
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
81
+ elif i == 3: # bottom right
82
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
83
+ x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
84
+
85
+ img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
86
+ padw = x1a - x1b
87
+ padh = y1a - y1b
88
+
89
+ # Labels
90
+ if labels.size:
91
+ labels[:, :-1] = xywhn2xyxy(labels[:, :-1], w, h, padw, padh) # normalized xywh to pixel xyxy format
92
+ labels4.append(labels)
93
+
94
+ # Concat/clip labels
95
+ labels4 = np.concatenate(labels4, 0)
96
+ for x in (labels4[:, :-1],):
97
+ np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
98
+ # img4, labels4 = replicate(img4, labels4) # replicate
99
+ labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
100
+ labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
101
+ labels4 = labels4[labels4[:, 2] > 0]
102
+ labels4 = labels4[labels4[:, 3] > 0]
103
+ return img4, labels4
104
+
105
+ def __getitem__(self, index):
106
+
107
+ image, bboxes = self.load_mosaic(index)
108
+
109
+ if self.transform:
110
+ augmentations = self.transform(image=image, bboxes=bboxes)
111
+ image = augmentations["image"]
112
+ bboxes = augmentations["bboxes"]
113
+
114
+ # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
115
+ targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
116
+ for box in bboxes:
117
+ iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
118
+ anchor_indices = iou_anchors.argsort(descending=True, dim=0)
119
+ x, y, width, height, class_label = box
120
+ has_anchor = [False] * 3 # each scale should have one anchor
121
+ for anchor_idx in anchor_indices:
122
+ scale_idx = anchor_idx // self.num_anchors_per_scale
123
+ anchor_on_scale = anchor_idx % self.num_anchors_per_scale
124
+ S = self.S[scale_idx]
125
+ i, j = int(S * y), int(S * x) # which cell
126
+ anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
127
+ if not anchor_taken and not has_anchor[scale_idx]:
128
+ targets[scale_idx][anchor_on_scale, i, j, 0] = 1
129
+ x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
130
+ width_cell, height_cell = (
131
+ width * S,
132
+ height * S,
133
+ ) # can be greater than 1 since it's relative to cell
134
+ box_coordinates = torch.tensor(
135
+ [x_cell, y_cell, width_cell, height_cell]
136
+ )
137
+ targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
138
+ targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
139
+ has_anchor[scale_idx] = True
140
+
141
+ elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
142
+ targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction
143
+
144
+ return image, tuple(targets)
145
+
146
+
147
+ def test():
148
+ anchors = config.ANCHORS
149
+
150
+ transform = config.test_transforms
151
+
152
+ dataset = YOLODataset(
153
+ "COCO/train.csv",
154
+ "COCO/images/images/",
155
+ "COCO/labels/labels_new/",
156
+ S=[13, 26, 52],
157
+ anchors=anchors,
158
+ transform=transform,
159
+ )
160
+ S = [13, 26, 52]
161
+ scaled_anchors = torch.tensor(anchors) / (
162
+ 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
163
+ )
164
+ loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
165
+ for x, y in loader:
166
+ boxes = []
167
+
168
+ for i in range(y[0].shape[1]):
169
+ anchor = scaled_anchors[i]
170
+ print(anchor.shape)
171
+ print(y[i].shape)
172
+ boxes += cells_to_bboxes(
173
+ y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
174
+ )[0]
175
+ boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
176
+ print(boxes)
177
+ plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
178
+
179
+
180
+ if __name__ == "__main__":
181
+ test()
dataset_org.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
3
+ """
4
+
5
+ import config
6
+ import numpy as np
7
+ import os
8
+ import pandas as pd
9
+ import torch
10
+
11
+ from PIL import Image, ImageFile
12
+ from torch.utils.data import Dataset, DataLoader
13
+ from utils import (
14
+ cells_to_bboxes,
15
+ iou_width_height as iou,
16
+ non_max_suppression as nms,
17
+ plot_image
18
+ )
19
+
20
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
21
+
22
+ class YOLODataset(Dataset):
23
+ def __init__(
24
+ self,
25
+ csv_file,
26
+ img_dir,
27
+ label_dir,
28
+ anchors,
29
+ image_size=416,
30
+ S=[13, 26, 52],
31
+ C=20,
32
+ transform=None,
33
+ ):
34
+ self.annotations = pd.read_csv(csv_file)
35
+ self.img_dir = img_dir
36
+ self.label_dir = label_dir
37
+ self.image_size = image_size
38
+ self.transform = transform
39
+ self.S = S
40
+ self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
41
+ self.num_anchors = self.anchors.shape[0]
42
+ self.num_anchors_per_scale = self.num_anchors // 3
43
+ self.C = C
44
+ self.ignore_iou_thresh = 0.5
45
+
46
+ def __len__(self):
47
+ return len(self.annotations)
48
+
49
+ def __getitem__(self, index):
50
+ label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
51
+ bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
52
+ img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
53
+ image = np.array(Image.open(img_path).convert("RGB"))
54
+
55
+ if self.transform:
56
+ augmentations = self.transform(image=image, bboxes=bboxes)
57
+ image = augmentations["image"]
58
+ bboxes = augmentations["bboxes"]
59
+
60
+ # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
61
+ targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
62
+ for box in bboxes:
63
+ iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
64
+ anchor_indices = iou_anchors.argsort(descending=True, dim=0)
65
+ x, y, width, height, class_label = box
66
+ has_anchor = [False] * 3 # each scale should have one anchor
67
+ for anchor_idx in anchor_indices:
68
+ scale_idx = anchor_idx // self.num_anchors_per_scale
69
+ anchor_on_scale = anchor_idx % self.num_anchors_per_scale
70
+ S = self.S[scale_idx]
71
+ i, j = int(S * y), int(S * x) # which cell
72
+ anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
73
+ if not anchor_taken and not has_anchor[scale_idx]:
74
+ targets[scale_idx][anchor_on_scale, i, j, 0] = 1
75
+ x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
76
+ width_cell, height_cell = (
77
+ width * S,
78
+ height * S,
79
+ ) # can be greater than 1 since it's relative to cell
80
+ box_coordinates = torch.tensor(
81
+ [x_cell, y_cell, width_cell, height_cell]
82
+ )
83
+ targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
84
+ targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
85
+ has_anchor[scale_idx] = True
86
+
87
+ elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
88
+ targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction
89
+
90
+ return image, tuple(targets)
91
+
92
+
93
+ def test():
94
+ anchors = config.ANCHORS
95
+
96
+ transform = config.test_transforms
97
+
98
+ dataset = YOLODataset(
99
+ "COCO/train.csv",
100
+ "COCO/images/images/",
101
+ "COCO/labels/labels_new/",
102
+ S=[13, 26, 52],
103
+ anchors=anchors,
104
+ transform=transform,
105
+ )
106
+ S = [13, 26, 52]
107
+ scaled_anchors = torch.tensor(anchors) / (
108
+ 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
109
+ )
110
+ loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
111
+ for x, y in loader:
112
+ boxes = []
113
+
114
+ for i in range(y[0].shape[1]):
115
+ anchor = scaled_anchors[i]
116
+ print(anchor.shape)
117
+ print(y[i].shape)
118
+ boxes += cells_to_bboxes(
119
+ y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
120
+ )[0]
121
+ boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
122
+ print(boxes)
123
+ plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
124
+
125
+
126
+ if __name__ == "__main__":
127
+ test()
loss.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
3
+ the difference from what I can tell is I use CrossEntropy for the classes
4
+ instead of BinaryCrossEntropy.
5
+ """
6
+ import random
7
+ import torch
8
+ import torch.nn as nn
9
+
10
+ from utils import intersection_over_union
11
+
12
+
13
+ class YoloLoss(nn.Module):
14
+ def __init__(self):
15
+ super().__init__()
16
+ self.mse = nn.MSELoss()
17
+ self.bce = nn.BCEWithLogitsLoss()
18
+ self.entropy = nn.CrossEntropyLoss()
19
+ self.sigmoid = nn.Sigmoid()
20
+
21
+ # Constants signifying how much to pay for each respective part of the loss
22
+ self.lambda_class = 1
23
+ self.lambda_noobj = 10
24
+ self.lambda_obj = 1
25
+ self.lambda_box = 10
26
+
27
+ def forward(self, predictions, target, anchors):
28
+ # Check where obj and noobj (we ignore if target == -1)
29
+ obj = target[..., 0] == 1 # in paper this is Iobj_i
30
+ noobj = target[..., 0] == 0 # in paper this is Inoobj_i
31
+
32
+ # ======================= #
33
+ # FOR NO OBJECT LOSS #
34
+ # ======================= #
35
+
36
+ no_object_loss = self.bce(
37
+ (predictions[..., 0:1][noobj]), (target[..., 0:1][noobj]),
38
+ )
39
+
40
+ # ==================== #
41
+ # FOR OBJECT LOSS #
42
+ # ==================== #
43
+
44
+ anchors = anchors.reshape(1, 3, 1, 1, 2)
45
+ box_preds = torch.cat([self.sigmoid(predictions[..., 1:3]), torch.exp(predictions[..., 3:5]) * anchors], dim=-1)
46
+ ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
47
+ object_loss = self.mse(self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj])
48
+
49
+ # ======================== #
50
+ # FOR BOX COORDINATES #
51
+ # ======================== #
52
+
53
+ predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3]) # x,y coordinates
54
+ target[..., 3:5] = torch.log(
55
+ (1e-16 + target[..., 3:5] / anchors)
56
+ ) # width, height coordinates
57
+ box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
58
+
59
+ # ================== #
60
+ # FOR CLASS LOSS #
61
+ # ================== #
62
+
63
+ class_loss = self.entropy(
64
+ (predictions[..., 5:][obj]), (target[..., 5][obj].long()),
65
+ )
66
+
67
+ #print("__________________________________")
68
+ #print(self.lambda_box * box_loss)
69
+ #print(self.lambda_obj * object_loss)
70
+ #print(self.lambda_noobj * no_object_loss)
71
+ #print(self.lambda_class * class_loss)
72
+ #print("\n")
73
+
74
+ return (
75
+ self.lambda_box * box_loss
76
+ + self.lambda_obj * object_loss
77
+ + self.lambda_noobj * no_object_loss
78
+ + self.lambda_class * class_loss
79
+ )