# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Code adapted from previous method LOST: https://github.com/valeoai/LOST """ import os import math import torch import json import torchvision import numpy as np import skimage.io from PIL import Image from tqdm import tqdm from torchvision import transforms as pth_transforms # Image transformation applied to all images transform = pth_transforms.Compose( [ pth_transforms.ToTensor(), pth_transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ] ) class ImageDataset: def __init__( self, image_path ): self.image_path = image_path self.name = image_path.split("/")[-1] # Read the image with open(image_path, "rb") as f: img = Image.open(f) img = img.convert("RGB") # Build a dataloader img = transform(img) self.dataloader = [[img, image_path]] def get_image_name(self, *args, **kwargs): return self.image_path.split("/")[-1].split(".")[0] def load_image(self, *args, **kwargs): return skimage.io.imread(self.image_path) class UODDataset: def __init__( self, dataset_name, dataset_set, root_dir, remove_hards:bool = False, ): """ Build the dataloader """ self.dataset_name = dataset_name self.set = dataset_set self.root_dir = root_dir if dataset_name == "VOC07": self.root_path = f"{root_dir}/VOC2007" self.year = "2007" elif dataset_name == "VOC12": self.root_path = f"{root_dir}/VOC2012" self.year = "2012" elif dataset_name == "COCO20k": self.year = "2014" self.root_path = f"{root_dir}/COCO/images/{dataset_set}{self.year}" self.sel20k = 'data/coco_20k_filenames.txt' # JSON file constructed based on COCO train2014 gt self.all_annfile = f"{root_dir}/COCO/annotations/instances_train2014.json" self.annfile = f"{root_dir}/instances_train2014_sel20k.json" if not os.path.exists(self.annfile): select_coco_20k(self.sel20k, self.all_annfile) else: raise ValueError("Unknown dataset.") if not os.path.exists(self.root_path): raise ValueError("Please follow the README to setup the datasets.") self.name = f"{self.dataset_name}_{self.set}" # Build the dataloader if "VOC" in dataset_name: self.dataloader = torchvision.datasets.VOCDetection( self.root_path, year=self.year, image_set=self.set, transform=transform, download=False, ) elif "COCO20k" == dataset_name: self.dataloader = torchvision.datasets.CocoDetection( self.root_path, annFile=self.annfile, transform=transform ) else: raise ValueError("Unknown dataset.") # Set hards images that are not included self.remove_hards = remove_hards self.hards = [] if remove_hards: self.name += f"-nohards" self.hards = self.get_hards() print(f"Nb images discarded {len(self.hards)}") def __len__(self) -> int: return len(self.dataloader) def load_image(self, im_name): """ Load the image corresponding to the im_name """ if "VOC" in self.dataset_name: image = skimage.io.imread(f"{self.root_dir}/VOC{self.year}/JPEGImages/{im_name}") elif "COCO" in self.dataset_name: im_path = self.path_20k[self.sel_20k.index(im_name)] image = skimage.io.imread(f"{self.root_dir}/COCO/images/{im_path}") else: raise ValueError("Unkown dataset.") return image def get_image_name(self, inp): """ Return the image name """ if "VOC" in self.dataset_name: im_name = inp["annotation"]["filename"] elif "COCO" in self.dataset_name: im_name = str(inp[0]["image_id"]) return im_name def extract_gt(self, targets, im_name): if "VOC" in self.dataset_name: return extract_gt_VOC(targets, remove_hards=self.remove_hards) elif "COCO" in self.dataset_name: return extract_gt_COCO(targets, remove_iscrowd=True) else: raise ValueError("Unknown dataset") def extract_classes(self): if "VOC" in self.dataset_name: cls_path = f"classes_{self.set}_{self.year}.txt" elif "COCO" in self.dataset_name: cls_path = f"classes_{self.dataset}_{self.set}_{self.year}.txt" # Load if exists if os.path.exists(cls_path): all_classes = [] with open(cls_path, "r") as f: for line in f: all_classes.append(line.strip()) else: print("Extract all classes from the dataset") if "VOC" in self.dataset_name: all_classes = self.extract_classes_VOC() elif "COCO" in self.dataset_name: all_classes = self.extract_classes_COCO() with open(cls_path, "w") as f: for s in all_classes: f.write(str(s) + "\n") return all_classes def extract_classes_VOC(self): all_classes = [] for im_id, inp in enumerate(tqdm(self.dataloader)): objects = inp[1]["annotation"]["object"] for o in range(len(objects)): if objects[o]["name"] not in all_classes: all_classes.append(objects[o]["name"]) return all_classes def extract_classes_COCO(self): all_classes = [] for im_id, inp in enumerate(tqdm(self.dataloader)): objects = inp[1] for o in range(len(objects)): if objects[o]["category_id"] not in all_classes: all_classes.append(objects[o]["category_id"]) return all_classes def get_hards(self): hard_path = "datasets/hard_%s_%s_%s.txt" % (self.dataset_name, self.set, self.year) if os.path.exists(hard_path): hards = [] with open(hard_path, "r") as f: for line in f: hards.append(int(line.strip())) else: print("Discover hard images that should be discarded") if "VOC" in self.dataset_name: # set the hards hards = discard_hard_voc(self.dataloader) with open(hard_path, "w") as f: for s in hards: f.write(str(s) + "\n") return hards def discard_hard_voc(dataloader): hards = [] for im_id, inp in enumerate(tqdm(dataloader)): objects = inp[1]["annotation"]["object"] nb_obj = len(objects) hard = np.zeros(nb_obj) for i, o in enumerate(range(nb_obj)): hard[i] = ( 1 if (objects[o]["truncated"] == "1" or objects[o]["difficult"] == "1") else 0 ) # all images with only truncated or difficult objects if np.sum(hard) == nb_obj: hards.append(im_id) return hards def extract_gt_COCO(targets, remove_iscrowd=True): objects = targets nb_obj = len(objects) gt_bbxs = [] gt_clss = [] for o in range(nb_obj): # Remove iscrowd boxes if remove_iscrowd and objects[o]["iscrowd"] == 1: continue gt_cls = objects[o]["category_id"] gt_clss.append(gt_cls) bbx = objects[o]["bbox"] x1y1x2y2 = [bbx[0], bbx[1], bbx[0] + bbx[2], bbx[1] + bbx[3]] x1y1x2y2 = [int(round(x)) for x in x1y1x2y2] gt_bbxs.append(x1y1x2y2) return np.asarray(gt_bbxs), gt_clss def extract_gt_VOC(targets, remove_hards=False): objects = targets["annotation"]["object"] nb_obj = len(objects) gt_bbxs = [] gt_clss = [] for o in range(nb_obj): if remove_hards and ( objects[o]["truncated"] == "1" or objects[o]["difficult"] == "1" ): continue gt_cls = objects[o]["name"] gt_clss.append(gt_cls) obj = objects[o]["bndbox"] x1y1x2y2 = [ int(obj["xmin"]), int(obj["ymin"]), int(obj["xmax"]), int(obj["ymax"]), ] # Original annotations are integers in the range [1, W or H] # Assuming they mean 1-based pixel indices (inclusive), # a box with annotation (xmin=1, xmax=W) covers the whole image. # In coordinate space this is represented by (xmin=0, xmax=W) x1y1x2y2[0] -= 1 x1y1x2y2[1] -= 1 gt_bbxs.append(x1y1x2y2) return np.asarray(gt_bbxs), gt_clss def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): # https://github.com/ultralytics/yolov5/blob/develop/utils/general.py # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.T # Get the coordinates of bounding boxes if x1y1x2y2: # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] else: # transform from xywh to xyxy b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) ).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps union = w1 * h1 + w2 * h2 - inter + eps iou = inter / union if GIoU or DIoU or CIoU: cw = torch.max(b1_x2, b2_x2) - torch.min( b1_x1, b2_x1 ) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared rho2 = ( (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 ) / 4 # center distance squared if DIoU: return iou - rho2 / c2 # DIoU elif ( CIoU ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 v = (4 / math.pi ** 2) * torch.pow( torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 ) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU else: # GIoU https://arxiv.org/pdf/1902.09630.pdf c_area = cw * ch + eps # convex area return iou - (c_area - union) / c_area # GIoU else: return iou # IoU def select_coco_20k(sel_file, all_annotations_file): print('Building COCO 20k dataset.') # load all annotations with open(all_annotations_file, "r") as f: train2014 = json.load(f) # load selected images with open(sel_file, "r") as f: sel_20k = f.readlines() sel_20k = [s.replace("\n", "") for s in sel_20k] im20k = [str(int(s.split("_")[-1].split(".")[0])) for s in sel_20k] new_anno = [] new_images = [] for i in tqdm(im20k): new_anno.extend( [a for a in train2014["annotations"] if a["image_id"] == int(i)] ) new_images.extend([a for a in train2014["images"] if a["id"] == int(i)]) train2014_20k = {} train2014_20k["images"] = new_images train2014_20k["annotations"] = new_anno train2014_20k["categories"] = train2014["categories"] with open("datasets/instances_train2014_sel20k.json", "w") as outfile: json.dump(train2014_20k, outfile) print('Done.')