Spaces:

RashiAgarwal
/

YOLO_v3_PyTorchLightning

Paused

App Files Files Community

RashiAgarwal commited on Aug 18, 2023

Commit

5e987a7

•

1 Parent(s): db4f44b

Upload 2 files

Browse files

Files changed (2) hide show

dataset.py +277 -0
loss.py +87 -0

dataset.py ADDED Viewed

	@@ -0,0 +1,277 @@

+"""
+Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
+"""
+import config
+import numpy as np
+import os
+import pandas as pd
+import torch
+from utils import xywhn2xyxy, xyxy2xywhn
+import random
+from PIL import Image, ImageFile
+from torch.utils.data import Dataset, DataLoader
+from utils import (
+    cells_to_bboxes,
+    iou_width_height as iou,
+    non_max_suppression as nms,
+    plot_image
+)
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+class YOLODataset(Dataset):
+    def __init__(
+        self,
+        csv_file,
+        img_dir,
+        label_dir,
+        anchors,
+        image_size=416,
+        S=[13, 26, 52],
+        C=20,
+        transform=None,
+    ):
+        self.annotations = pd.read_csv(csv_file)
+        self.img_dir = img_dir
+        self.label_dir = label_dir
+        self.image_size = image_size
+        self.mosaic_border = [image_size // 2, image_size // 2]
+        self.transform = transform
+        self.S = S
+        self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2])  # for all 3 scales
+        self.num_anchors = self.anchors.shape[0]
+        self.num_anchors_per_scale = self.num_anchors // 3
+        self.C = C
+        self.ignore_iou_thresh = 0.5
+    def __len__(self):
+        return len(self.annotations)
+    def load_image(self, index):
+        label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+        # Load data from the file
+        data = np.loadtxt(fname=label_path,delimiter=" ", ndmin=2)
+        # Shift the values in each row by 4 positions to the right
+        shifted_data = np.roll(data, 4, axis=1)
+        # Convert the shifted data to a Python list
+        bboxes = shifted_data.tolist()
+        img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+        image = np.array(Image.open(img_path).convert("RGB"))
+        return image, bboxes
+    def load_mosaic(self, index, p=0.75):
+        ''' loading mosaic augmentation for only 75% times '''
+        k = np.random.rand(1)
+        if k > p:
+             return self.load_image(index)
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4 = []
+        s = self.image_size
+        yc, xc = (int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border)  # mosaic center x, y
+        indices = [index] + random.choices(range(len(self)), k=3)  # 3 additional image indices
+        random.shuffle(indices)
+        for i, index in enumerate(indices):
+            # Load image
+            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+            bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
+            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+            img = np.array(Image.open(img_path).convert("RGB"))
+            h, w = img.shape[0], img.shape[1]
+            labels = np.array(bboxes)
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+            # Labels
+            if labels.size:
+                labels[:, :-1] = xywhn2xyxy(labels[:, :-1], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+            labels4.append(labels)
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, :-1],):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+        labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
+        labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
+        labels4 = labels4[labels4[:, 2] > 0]
+        labels4 = labels4[labels4[:, 3] > 0]
+        return img4, labels4
+    def __getitem__(self, index):
+        # k =  np.random.rand(1)
+        # if k >= 0.75:
+        #     image, (h0, w0), (h, w) = load_image(self, index)
+        #     # Letterbox
+        #     shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
+        #     image, ratio, pad = letterbox(image, shape, auto=False, scaleup=self.augment)
+        #     shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+        #     # Load labels
+        #     bboxes = []
+        #     x = self.bboxes[index]
+        #     if x is not None and x.size > 0:
+        #         # Normalized xywh to pixel xyxy format
+        #         bboxes = x.copy()
+        #         bboxes[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width
+        #         bboxes[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height
+        #         bboxes[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
+        #         bboxes[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
+        # else:
+        image, bboxes = self.load_mosaic(index)
+        if self.transform:
+            augmentations = self.transform(image=image, bboxes=bboxes)
+            image = augmentations["image"]
+            bboxes = augmentations["bboxes"]
+        # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
+        targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
+        for box in bboxes:
+            iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
+            anchor_indices = iou_anchors.argsort(descending=True, dim=0)
+            x, y, width, height, class_label = box
+            has_anchor = [False] * 3  # each scale should have one anchor
+            for anchor_idx in anchor_indices:
+                scale_idx = anchor_idx // self.num_anchors_per_scale
+                anchor_on_scale = anchor_idx % self.num_anchors_per_scale
+                S = self.S[scale_idx]
+                i, j = int(S * y), int(S * x)  # which cell
+                anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
+                if not anchor_taken and not has_anchor[scale_idx]:
+                    targets[scale_idx][anchor_on_scale, i, j, 0] = 1
+                    x_cell, y_cell = S * x - j, S * y - i  # both between [0,1]
+                    width_cell, height_cell = (
+                        width * S,
+                        height * S,
+                    )  # can be greater than 1 since it's relative to cell
+                    box_coordinates = torch.tensor(
+                        [x_cell, y_cell, width_cell, height_cell]
+                    )
+                    targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
+                    targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
+                    has_anchor[scale_idx] = True
+                elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
+                    targets[scale_idx][anchor_on_scale, i, j, 0] = -1  # ignore prediction
+        return image, tuple(targets)
+def load_image(self, index):
+    # loads 1 image from dataset, returns img, original hw, resized hw
+    img = self.imgs[index]
+    if img is None:  # not cached
+        img_path = self.img_files[index]
+        img = cv2.imread(img_path)  # BGR
+        assert img is not None, 'Image Not Found ' + img_path
+        h0, w0 = img.shape[:2]  # orig hw
+        r = self.img_size / max(h0, w0)  # resize image to img_size
+        if r < 1 or (self.augment and r != 1):  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
+    else:
+        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized
+def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
+    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    # Scale ratio (new / old)
+    r = max(new_shape) / max(shape)
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = new_shape
+        ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]  # width, height ratios
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return img, ratio, (dw, dh)
+def test():
+    anchors = config.ANCHORS
+    transform = config.test_transforms
+    dataset = YOLODataset(
+        "COCO/train.csv",
+        "COCO/images/images/",
+        "COCO/labels/labels_new/",
+        S=[13, 26, 52],
+        anchors=anchors,
+        transform=transform,
+    )
+    S = [13, 26, 52]
+    scaled_anchors = torch.tensor(anchors) / (
+        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
+    )
+    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
+    for x, y in loader:
+        boxes = []
+        for i in range(y[0].shape[1]):
+            anchor = scaled_anchors[i]
+            print(anchor.shape)
+            print(y[i].shape)
+            boxes += cells_to_bboxes(
+                y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
+            )[0]
+        boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
+        print(boxes)
+        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
+if __name__ == "__main__":
+    test()

loss.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
+the difference from what I can tell is I use CrossEntropy for the classes
+instead of BinaryCrossEntropy.
+"""
+import torch
+import torch.nn as nn
+from pytorch_lightning import LightningModule
+from utils import intersection_over_union
+class YoloLoss_basic(LightningModule):
+    def __init__(self):
+        super(YoloLoss_basic, self).__init__()
+        self.mse = nn.MSELoss()
+        self.bce = nn.BCEWithLogitsLoss()
+        self.entropy = nn.CrossEntropyLoss()
+        self.sigmoid = nn.Sigmoid()
+        # Constants signifying how much to pay for each respective part of the loss
+        self.lambda_class = 1
+        self.lambda_noobj = 10
+        self.lambda_obj = 1
+        self.lambda_box = 10
+    def cal_loss(self, predictions, target, anchors):
+        # Check where obj and noobj (we ignore if target == -1)
+        obj = target[..., 0] == 1  # in paper this is Iobj_i
+        noobj = target[..., 0] == 0  # in paper this is Inoobj_i
+        # ======================= #
+        #   FOR NO OBJECT LOSS    #
+        # ======================= #
+        no_object_loss = self.bce(
+            (predictions[..., 0:1][noobj]), (target[..., 0:1][noobj]),
+        )
+        # ==================== #
+        #   FOR OBJECT LOSS    #
+        # ==================== #
+        anchors = anchors.reshape(1, 3, 1, 1, 2).to(device="cuda")
+        box_preds = torch.cat([self.sigmoid(predictions[..., 1:3]), torch.exp(predictions[..., 3:5]) * anchors], dim=-1)
+        ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
+        object_loss = self.mse(self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj])
+        # ======================== #
+        #   FOR BOX COORDINATES    #
+        # ======================== #
+        predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3])  # x,y coordinates
+        target[..., 3:5] = torch.log(
+            (1e-16 + target[..., 3:5] / anchors)
+        )  # width, height coordinates
+        box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
+        # ================== #
+        #   FOR CLASS LOSS   #
+        # ================== #
+        class_loss = self.entropy(
+            (predictions[..., 5:][obj]), (target[..., 5][obj].long()),
+        )
+        return (
+            self.lambda_box * box_loss
+            + self.lambda_obj * object_loss
+            + self.lambda_noobj * no_object_loss
+            + self.lambda_class * class_loss
+        )
+    def forward(self, predictions, target, anchors):
+        return self.cal_loss(predictions, target, anchors)
+class YoloLoss(LightningModule):
+    def __init__(self):
+        super(YoloLoss, self).__init__()
+        self.yolo_basic = YoloLoss_basic()
+    def forward(self, predictions, target, scaled_anchors):
+        tot_loss = 0
+        for i in range(len(target)):
+            tot_loss += self.yolo_basic(predictions[i], target[i], scaled_anchors[i])
+        return tot_loss