RashiAgarwal commited on
Commit
5e987a7
1 Parent(s): db4f44b

Upload 2 files

Browse files
Files changed (2) hide show
  1. dataset.py +277 -0
  2. loss.py +87 -0
dataset.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
3
+ """
4
+
5
+ import config
6
+ import numpy as np
7
+ import os
8
+ import pandas as pd
9
+ import torch
10
+ from utils import xywhn2xyxy, xyxy2xywhn
11
+ import random
12
+
13
+ from PIL import Image, ImageFile
14
+ from torch.utils.data import Dataset, DataLoader
15
+ from utils import (
16
+ cells_to_bboxes,
17
+ iou_width_height as iou,
18
+ non_max_suppression as nms,
19
+ plot_image
20
+ )
21
+
22
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
23
+
24
+ class YOLODataset(Dataset):
25
+ def __init__(
26
+ self,
27
+ csv_file,
28
+ img_dir,
29
+ label_dir,
30
+ anchors,
31
+ image_size=416,
32
+ S=[13, 26, 52],
33
+ C=20,
34
+ transform=None,
35
+ ):
36
+ self.annotations = pd.read_csv(csv_file)
37
+ self.img_dir = img_dir
38
+ self.label_dir = label_dir
39
+ self.image_size = image_size
40
+ self.mosaic_border = [image_size // 2, image_size // 2]
41
+ self.transform = transform
42
+ self.S = S
43
+ self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
44
+ self.num_anchors = self.anchors.shape[0]
45
+ self.num_anchors_per_scale = self.num_anchors // 3
46
+ self.C = C
47
+ self.ignore_iou_thresh = 0.5
48
+
49
+ def __len__(self):
50
+ return len(self.annotations)
51
+
52
+ def load_image(self, index):
53
+
54
+ label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
55
+
56
+ # Load data from the file
57
+ data = np.loadtxt(fname=label_path,delimiter=" ", ndmin=2)
58
+
59
+ # Shift the values in each row by 4 positions to the right
60
+ shifted_data = np.roll(data, 4, axis=1)
61
+
62
+ # Convert the shifted data to a Python list
63
+ bboxes = shifted_data.tolist()
64
+
65
+ img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
66
+
67
+ image = np.array(Image.open(img_path).convert("RGB"))
68
+
69
+ return image, bboxes
70
+
71
+ def load_mosaic(self, index, p=0.75):
72
+ ''' loading mosaic augmentation for only 75% times '''
73
+
74
+ k = np.random.rand(1)
75
+ if k > p:
76
+
77
+ return self.load_image(index)
78
+
79
+ # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
80
+ labels4 = []
81
+ s = self.image_size
82
+ yc, xc = (int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border) # mosaic center x, y
83
+ indices = [index] + random.choices(range(len(self)), k=3) # 3 additional image indices
84
+ random.shuffle(indices)
85
+ for i, index in enumerate(indices):
86
+ # Load image
87
+ label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
88
+ bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
89
+ img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
90
+ img = np.array(Image.open(img_path).convert("RGB"))
91
+
92
+
93
+ h, w = img.shape[0], img.shape[1]
94
+ labels = np.array(bboxes)
95
+
96
+ # place img in img4
97
+ if i == 0: # top left
98
+ img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
99
+ x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
100
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
101
+ elif i == 1: # top right
102
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
103
+ x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
104
+ elif i == 2: # bottom left
105
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
106
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
107
+ elif i == 3: # bottom right
108
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
109
+ x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
110
+
111
+ img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
112
+ padw = x1a - x1b
113
+ padh = y1a - y1b
114
+
115
+ # Labels
116
+ if labels.size:
117
+ labels[:, :-1] = xywhn2xyxy(labels[:, :-1], w, h, padw, padh) # normalized xywh to pixel xyxy format
118
+ labels4.append(labels)
119
+
120
+ # Concat/clip labels
121
+ labels4 = np.concatenate(labels4, 0)
122
+ for x in (labels4[:, :-1],):
123
+ np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
124
+ # img4, labels4 = replicate(img4, labels4) # replicate
125
+ labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
126
+ labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
127
+ labels4 = labels4[labels4[:, 2] > 0]
128
+ labels4 = labels4[labels4[:, 3] > 0]
129
+ return img4, labels4
130
+
131
+ def __getitem__(self, index):
132
+
133
+ # k = np.random.rand(1)
134
+ # if k >= 0.75:
135
+
136
+ # image, (h0, w0), (h, w) = load_image(self, index)
137
+
138
+ # # Letterbox
139
+ # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
140
+ # image, ratio, pad = letterbox(image, shape, auto=False, scaleup=self.augment)
141
+ # shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
142
+
143
+ # # Load labels
144
+ # bboxes = []
145
+ # x = self.bboxes[index]
146
+ # if x is not None and x.size > 0:
147
+ # # Normalized xywh to pixel xyxy format
148
+ # bboxes = x.copy()
149
+ # bboxes[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
150
+ # bboxes[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
151
+ # bboxes[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
152
+ # bboxes[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
153
+
154
+
155
+ # else:
156
+ image, bboxes = self.load_mosaic(index)
157
+
158
+ if self.transform:
159
+ augmentations = self.transform(image=image, bboxes=bboxes)
160
+ image = augmentations["image"]
161
+ bboxes = augmentations["bboxes"]
162
+
163
+ # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
164
+ targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
165
+ for box in bboxes:
166
+ iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
167
+ anchor_indices = iou_anchors.argsort(descending=True, dim=0)
168
+ x, y, width, height, class_label = box
169
+ has_anchor = [False] * 3 # each scale should have one anchor
170
+ for anchor_idx in anchor_indices:
171
+ scale_idx = anchor_idx // self.num_anchors_per_scale
172
+ anchor_on_scale = anchor_idx % self.num_anchors_per_scale
173
+ S = self.S[scale_idx]
174
+ i, j = int(S * y), int(S * x) # which cell
175
+ anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
176
+ if not anchor_taken and not has_anchor[scale_idx]:
177
+ targets[scale_idx][anchor_on_scale, i, j, 0] = 1
178
+ x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
179
+ width_cell, height_cell = (
180
+ width * S,
181
+ height * S,
182
+ ) # can be greater than 1 since it's relative to cell
183
+ box_coordinates = torch.tensor(
184
+ [x_cell, y_cell, width_cell, height_cell]
185
+ )
186
+ targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
187
+ targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
188
+ has_anchor[scale_idx] = True
189
+
190
+ elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
191
+ targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction
192
+
193
+ return image, tuple(targets)
194
+
195
+ def load_image(self, index):
196
+ # loads 1 image from dataset, returns img, original hw, resized hw
197
+ img = self.imgs[index]
198
+ if img is None: # not cached
199
+ img_path = self.img_files[index]
200
+ img = cv2.imread(img_path) # BGR
201
+ assert img is not None, 'Image Not Found ' + img_path
202
+ h0, w0 = img.shape[:2] # orig hw
203
+ r = self.img_size / max(h0, w0) # resize image to img_size
204
+ if r < 1 or (self.augment and r != 1): # always resize down, only resize up if training with augmentation
205
+ interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
206
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
207
+ return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
208
+ else:
209
+ return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
210
+
211
+ def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
212
+ # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
213
+ shape = img.shape[:2] # current shape [height, width]
214
+ if isinstance(new_shape, int):
215
+ new_shape = (new_shape, new_shape)
216
+
217
+ # Scale ratio (new / old)
218
+ r = max(new_shape) / max(shape)
219
+ if not scaleup: # only scale down, do not scale up (for better test mAP)
220
+ r = min(r, 1.0)
221
+
222
+ # Compute padding
223
+ ratio = r, r # width, height ratios
224
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
225
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
226
+ if auto: # minimum rectangle
227
+ dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
228
+ elif scaleFill: # stretch
229
+ dw, dh = 0.0, 0.0
230
+ new_unpad = new_shape
231
+ ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # width, height ratios
232
+
233
+ dw /= 2 # divide padding into 2 sides
234
+ dh /= 2
235
+
236
+ if shape[::-1] != new_unpad: # resize
237
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
238
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
239
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
240
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
241
+ return img, ratio, (dw, dh)
242
+
243
+ def test():
244
+ anchors = config.ANCHORS
245
+
246
+ transform = config.test_transforms
247
+
248
+ dataset = YOLODataset(
249
+ "COCO/train.csv",
250
+ "COCO/images/images/",
251
+ "COCO/labels/labels_new/",
252
+ S=[13, 26, 52],
253
+ anchors=anchors,
254
+ transform=transform,
255
+ )
256
+ S = [13, 26, 52]
257
+ scaled_anchors = torch.tensor(anchors) / (
258
+ 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
259
+ )
260
+ loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
261
+ for x, y in loader:
262
+ boxes = []
263
+
264
+ for i in range(y[0].shape[1]):
265
+ anchor = scaled_anchors[i]
266
+ print(anchor.shape)
267
+ print(y[i].shape)
268
+ boxes += cells_to_bboxes(
269
+ y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
270
+ )[0]
271
+ boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
272
+ print(boxes)
273
+ plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
274
+
275
+
276
+ if __name__ == "__main__":
277
+ test()
loss.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
3
+ the difference from what I can tell is I use CrossEntropy for the classes
4
+ instead of BinaryCrossEntropy.
5
+ """
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+ from pytorch_lightning import LightningModule
10
+ from utils import intersection_over_union
11
+
12
+
13
+ class YoloLoss_basic(LightningModule):
14
+ def __init__(self):
15
+ super(YoloLoss_basic, self).__init__()
16
+ self.mse = nn.MSELoss()
17
+ self.bce = nn.BCEWithLogitsLoss()
18
+ self.entropy = nn.CrossEntropyLoss()
19
+ self.sigmoid = nn.Sigmoid()
20
+
21
+ # Constants signifying how much to pay for each respective part of the loss
22
+ self.lambda_class = 1
23
+ self.lambda_noobj = 10
24
+ self.lambda_obj = 1
25
+ self.lambda_box = 10
26
+
27
+ def cal_loss(self, predictions, target, anchors):
28
+ # Check where obj and noobj (we ignore if target == -1)
29
+ obj = target[..., 0] == 1 # in paper this is Iobj_i
30
+ noobj = target[..., 0] == 0 # in paper this is Inoobj_i
31
+
32
+ # ======================= #
33
+ # FOR NO OBJECT LOSS #
34
+ # ======================= #
35
+
36
+ no_object_loss = self.bce(
37
+ (predictions[..., 0:1][noobj]), (target[..., 0:1][noobj]),
38
+ )
39
+
40
+ # ==================== #
41
+ # FOR OBJECT LOSS #
42
+ # ==================== #
43
+
44
+ anchors = anchors.reshape(1, 3, 1, 1, 2).to(device="cuda")
45
+ box_preds = torch.cat([self.sigmoid(predictions[..., 1:3]), torch.exp(predictions[..., 3:5]) * anchors], dim=-1)
46
+ ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
47
+ object_loss = self.mse(self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj])
48
+
49
+ # ======================== #
50
+ # FOR BOX COORDINATES #
51
+ # ======================== #
52
+
53
+ predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3]) # x,y coordinates
54
+ target[..., 3:5] = torch.log(
55
+ (1e-16 + target[..., 3:5] / anchors)
56
+ ) # width, height coordinates
57
+ box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
58
+
59
+ # ================== #
60
+ # FOR CLASS LOSS #
61
+ # ================== #
62
+
63
+ class_loss = self.entropy(
64
+ (predictions[..., 5:][obj]), (target[..., 5][obj].long()),
65
+ )
66
+
67
+ return (
68
+ self.lambda_box * box_loss
69
+ + self.lambda_obj * object_loss
70
+ + self.lambda_noobj * no_object_loss
71
+ + self.lambda_class * class_loss
72
+ )
73
+
74
+ def forward(self, predictions, target, anchors):
75
+ return self.cal_loss(predictions, target, anchors)
76
+
77
+
78
+ class YoloLoss(LightningModule):
79
+ def __init__(self):
80
+ super(YoloLoss, self).__init__()
81
+ self.yolo_basic = YoloLoss_basic()
82
+
83
+ def forward(self, predictions, target, scaled_anchors):
84
+ tot_loss = 0
85
+ for i in range(len(target)):
86
+ tot_loss += self.yolo_basic(predictions[i], target[i], scaled_anchors[i])
87
+ return tot_loss