|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
Code adapted from previous method LOST: https://github.com/valeoai/LOST |
|
""" |
|
|
|
import os |
|
import math |
|
import torch |
|
import json |
|
import torchvision |
|
import numpy as np |
|
import skimage.io |
|
|
|
from PIL import Image |
|
from tqdm import tqdm |
|
from torchvision import transforms as pth_transforms |
|
|
|
|
|
transform = pth_transforms.Compose( |
|
[ |
|
pth_transforms.ToTensor(), |
|
pth_transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), |
|
] |
|
) |
|
|
|
class ImageDataset: |
|
def __init__( |
|
self, |
|
image_path |
|
): |
|
|
|
self.image_path = image_path |
|
self.name = image_path.split("/")[-1] |
|
|
|
|
|
with open(image_path, "rb") as f: |
|
img = Image.open(f) |
|
img = img.convert("RGB") |
|
|
|
|
|
img = transform(img) |
|
self.dataloader = [[img, image_path]] |
|
|
|
def get_image_name(self, *args, **kwargs): |
|
return self.image_path.split("/")[-1].split(".")[0] |
|
|
|
def load_image(self, *args, **kwargs): |
|
return skimage.io.imread(self.image_path) |
|
|
|
class UODDataset: |
|
def __init__( |
|
self, |
|
dataset_name, |
|
dataset_set, |
|
root_dir, |
|
remove_hards:bool = False, |
|
): |
|
""" |
|
Build the dataloader |
|
""" |
|
|
|
self.dataset_name = dataset_name |
|
self.set = dataset_set |
|
self.root_dir = root_dir |
|
|
|
if dataset_name == "VOC07": |
|
self.root_path = f"{root_dir}/VOC2007" |
|
self.year = "2007" |
|
elif dataset_name == "VOC12": |
|
self.root_path = f"{root_dir}/VOC2012" |
|
self.year = "2012" |
|
elif dataset_name == "COCO20k": |
|
self.year = "2014" |
|
self.root_path = f"{root_dir}/COCO/images/{dataset_set}{self.year}" |
|
self.sel20k = 'data/coco_20k_filenames.txt' |
|
|
|
self.all_annfile = f"{root_dir}/COCO/annotations/instances_train2014.json" |
|
self.annfile = f"{root_dir}/instances_train2014_sel20k.json" |
|
if not os.path.exists(self.annfile): |
|
select_coco_20k(self.sel20k, self.all_annfile) |
|
else: |
|
raise ValueError("Unknown dataset.") |
|
|
|
if not os.path.exists(self.root_path): |
|
raise ValueError("Please follow the README to setup the datasets.") |
|
|
|
self.name = f"{self.dataset_name}_{self.set}" |
|
|
|
|
|
if "VOC" in dataset_name: |
|
self.dataloader = torchvision.datasets.VOCDetection( |
|
self.root_path, |
|
year=self.year, |
|
image_set=self.set, |
|
transform=transform, |
|
download=False, |
|
) |
|
elif "COCO20k" == dataset_name: |
|
self.dataloader = torchvision.datasets.CocoDetection( |
|
self.root_path, annFile=self.annfile, transform=transform |
|
) |
|
else: |
|
raise ValueError("Unknown dataset.") |
|
|
|
|
|
self.remove_hards = remove_hards |
|
self.hards = [] |
|
if remove_hards: |
|
self.name += f"-nohards" |
|
self.hards = self.get_hards() |
|
print(f"Nb images discarded {len(self.hards)}") |
|
|
|
def __len__(self) -> int: |
|
return len(self.dataloader) |
|
|
|
def load_image(self, im_name): |
|
""" |
|
Load the image corresponding to the im_name |
|
""" |
|
if "VOC" in self.dataset_name: |
|
image = skimage.io.imread(f"{self.root_dir}/VOC{self.year}/JPEGImages/{im_name}") |
|
elif "COCO" in self.dataset_name: |
|
im_path = self.path_20k[self.sel_20k.index(im_name)] |
|
image = skimage.io.imread(f"{self.root_dir}/COCO/images/{im_path}") |
|
else: |
|
raise ValueError("Unkown dataset.") |
|
return image |
|
|
|
def get_image_name(self, inp): |
|
""" |
|
Return the image name |
|
""" |
|
if "VOC" in self.dataset_name: |
|
im_name = inp["annotation"]["filename"] |
|
elif "COCO" in self.dataset_name: |
|
im_name = str(inp[0]["image_id"]) |
|
|
|
return im_name |
|
|
|
def extract_gt(self, targets, im_name): |
|
if "VOC" in self.dataset_name: |
|
return extract_gt_VOC(targets, remove_hards=self.remove_hards) |
|
elif "COCO" in self.dataset_name: |
|
return extract_gt_COCO(targets, remove_iscrowd=True) |
|
else: |
|
raise ValueError("Unknown dataset") |
|
|
|
def extract_classes(self): |
|
if "VOC" in self.dataset_name: |
|
cls_path = f"classes_{self.set}_{self.year}.txt" |
|
elif "COCO" in self.dataset_name: |
|
cls_path = f"classes_{self.dataset}_{self.set}_{self.year}.txt" |
|
|
|
|
|
if os.path.exists(cls_path): |
|
all_classes = [] |
|
with open(cls_path, "r") as f: |
|
for line in f: |
|
all_classes.append(line.strip()) |
|
else: |
|
print("Extract all classes from the dataset") |
|
if "VOC" in self.dataset_name: |
|
all_classes = self.extract_classes_VOC() |
|
elif "COCO" in self.dataset_name: |
|
all_classes = self.extract_classes_COCO() |
|
|
|
with open(cls_path, "w") as f: |
|
for s in all_classes: |
|
f.write(str(s) + "\n") |
|
|
|
return all_classes |
|
|
|
def extract_classes_VOC(self): |
|
all_classes = [] |
|
for im_id, inp in enumerate(tqdm(self.dataloader)): |
|
objects = inp[1]["annotation"]["object"] |
|
|
|
for o in range(len(objects)): |
|
if objects[o]["name"] not in all_classes: |
|
all_classes.append(objects[o]["name"]) |
|
|
|
return all_classes |
|
|
|
def extract_classes_COCO(self): |
|
all_classes = [] |
|
for im_id, inp in enumerate(tqdm(self.dataloader)): |
|
objects = inp[1] |
|
|
|
for o in range(len(objects)): |
|
if objects[o]["category_id"] not in all_classes: |
|
all_classes.append(objects[o]["category_id"]) |
|
|
|
return all_classes |
|
|
|
def get_hards(self): |
|
hard_path = "datasets/hard_%s_%s_%s.txt" % (self.dataset_name, self.set, self.year) |
|
if os.path.exists(hard_path): |
|
hards = [] |
|
with open(hard_path, "r") as f: |
|
for line in f: |
|
hards.append(int(line.strip())) |
|
else: |
|
print("Discover hard images that should be discarded") |
|
|
|
if "VOC" in self.dataset_name: |
|
|
|
hards = discard_hard_voc(self.dataloader) |
|
|
|
with open(hard_path, "w") as f: |
|
for s in hards: |
|
f.write(str(s) + "\n") |
|
|
|
return hards |
|
|
|
|
|
def discard_hard_voc(dataloader): |
|
hards = [] |
|
for im_id, inp in enumerate(tqdm(dataloader)): |
|
objects = inp[1]["annotation"]["object"] |
|
nb_obj = len(objects) |
|
|
|
hard = np.zeros(nb_obj) |
|
for i, o in enumerate(range(nb_obj)): |
|
hard[i] = ( |
|
1 |
|
if (objects[o]["truncated"] == "1" or objects[o]["difficult"] == "1") |
|
else 0 |
|
) |
|
|
|
|
|
if np.sum(hard) == nb_obj: |
|
hards.append(im_id) |
|
return hards |
|
|
|
|
|
def extract_gt_COCO(targets, remove_iscrowd=True): |
|
objects = targets |
|
nb_obj = len(objects) |
|
|
|
gt_bbxs = [] |
|
gt_clss = [] |
|
for o in range(nb_obj): |
|
|
|
if remove_iscrowd and objects[o]["iscrowd"] == 1: |
|
continue |
|
gt_cls = objects[o]["category_id"] |
|
gt_clss.append(gt_cls) |
|
bbx = objects[o]["bbox"] |
|
x1y1x2y2 = [bbx[0], bbx[1], bbx[0] + bbx[2], bbx[1] + bbx[3]] |
|
x1y1x2y2 = [int(round(x)) for x in x1y1x2y2] |
|
gt_bbxs.append(x1y1x2y2) |
|
|
|
return np.asarray(gt_bbxs), gt_clss |
|
|
|
|
|
def extract_gt_VOC(targets, remove_hards=False): |
|
objects = targets["annotation"]["object"] |
|
nb_obj = len(objects) |
|
|
|
gt_bbxs = [] |
|
gt_clss = [] |
|
for o in range(nb_obj): |
|
if remove_hards and ( |
|
objects[o]["truncated"] == "1" or objects[o]["difficult"] == "1" |
|
): |
|
continue |
|
gt_cls = objects[o]["name"] |
|
gt_clss.append(gt_cls) |
|
obj = objects[o]["bndbox"] |
|
x1y1x2y2 = [ |
|
int(obj["xmin"]), |
|
int(obj["ymin"]), |
|
int(obj["xmax"]), |
|
int(obj["ymax"]), |
|
] |
|
|
|
|
|
|
|
|
|
x1y1x2y2[0] -= 1 |
|
x1y1x2y2[1] -= 1 |
|
gt_bbxs.append(x1y1x2y2) |
|
|
|
return np.asarray(gt_bbxs), gt_clss |
|
|
|
|
|
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): |
|
|
|
|
|
box2 = box2.T |
|
|
|
|
|
if x1y1x2y2: |
|
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] |
|
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] |
|
else: |
|
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 |
|
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 |
|
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 |
|
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 |
|
|
|
|
|
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( |
|
torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) |
|
).clamp(0) |
|
|
|
|
|
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps |
|
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps |
|
union = w1 * h1 + w2 * h2 - inter + eps |
|
|
|
iou = inter / union |
|
if GIoU or DIoU or CIoU: |
|
cw = torch.max(b1_x2, b2_x2) - torch.min( |
|
b1_x1, b2_x1 |
|
) |
|
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) |
|
if CIoU or DIoU: |
|
c2 = cw ** 2 + ch ** 2 + eps |
|
rho2 = ( |
|
(b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 |
|
+ (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 |
|
) / 4 |
|
if DIoU: |
|
return iou - rho2 / c2 |
|
elif ( |
|
CIoU |
|
): |
|
v = (4 / math.pi ** 2) * torch.pow( |
|
torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 |
|
) |
|
with torch.no_grad(): |
|
alpha = v / (v - iou + (1 + eps)) |
|
return iou - (rho2 / c2 + v * alpha) |
|
else: |
|
c_area = cw * ch + eps |
|
return iou - (c_area - union) / c_area |
|
else: |
|
return iou |
|
|
|
def select_coco_20k(sel_file, all_annotations_file): |
|
print('Building COCO 20k dataset.') |
|
|
|
|
|
with open(all_annotations_file, "r") as f: |
|
train2014 = json.load(f) |
|
|
|
|
|
with open(sel_file, "r") as f: |
|
sel_20k = f.readlines() |
|
sel_20k = [s.replace("\n", "") for s in sel_20k] |
|
im20k = [str(int(s.split("_")[-1].split(".")[0])) for s in sel_20k] |
|
|
|
new_anno = [] |
|
new_images = [] |
|
|
|
for i in tqdm(im20k): |
|
new_anno.extend( |
|
[a for a in train2014["annotations"] if a["image_id"] == int(i)] |
|
) |
|
new_images.extend([a for a in train2014["images"] if a["id"] == int(i)]) |
|
|
|
train2014_20k = {} |
|
train2014_20k["images"] = new_images |
|
train2014_20k["annotations"] = new_anno |
|
train2014_20k["categories"] = train2014["categories"] |
|
|
|
with open("datasets/instances_train2014_sel20k.json", "w") as outfile: |
|
json.dump(train2014_20k, outfile) |
|
|
|
print('Done.') |