|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json |
|
import os |
|
from collections import defaultdict |
|
import torchvision.transforms as T |
|
from torch.utils.data import Dataset |
|
import PIL |
|
import numpy as np |
|
|
|
PREDICATES_VALUES = ["left of", "right of", "above", "below", "inside", "surrounding"] |
|
MEAN = [0.5, 0.5, 0.5] |
|
STD = [0.5, 0.5, 0.5] |
|
|
|
|
|
def imagenet_preprocess(): |
|
return T.Normalize(mean=MEAN, std=STD) |
|
|
|
|
|
class Resize(object): |
|
def __init__(self, size, interp=PIL.Image.BILINEAR): |
|
if isinstance(size, tuple): |
|
H, W = size |
|
self.size = (W, H) |
|
else: |
|
self.size = (size, size) |
|
self.interp = interp |
|
|
|
def __call__(self, img): |
|
return img.resize(self.size, self.interp) |
|
|
|
|
|
class CocoStuff(Dataset): |
|
""" |
|
A PyTorch Dataset for loading Coco and Coco-Stuff annotations. |
|
|
|
Parameters |
|
---------- |
|
image_dir: str |
|
Path to a directory where images are held. |
|
instances_json: str |
|
Path to a JSON file giving COCO annotations. |
|
stuff_json: str,optional |
|
Path to a JSON file giving COCO_Stuff annotations. |
|
stuff_only: bool, optional |
|
If True then only iterate over images which appear in stuff_json; |
|
if False then iterate over all images in instances_json. |
|
image_size: tuple, optional |
|
Size (H, W) at which to load images. Default (64, 64). |
|
normalize_image: bool, optional |
|
If True then normalize images by subtracting ImageNet mean pixel and dividing by |
|
ImageNet std pixel. |
|
max_samples: int, optional |
|
If None use all images. Other wise only use images in the range [0, max_samples). |
|
Default None. |
|
min_object_size: float, optional |
|
Ignore objects whose bounding box takes up less than this fraction of the image. |
|
min_objects_per_image: int, optional |
|
Ignore images which have fewer than this many object annotations. |
|
max_objects_per_image: int, optional |
|
Ignore images which have more than this many object annotations. |
|
instance_whitelist: list, optional |
|
None means use all instance categories. Otherwise a list giving a whitelist of |
|
instance category names to use. |
|
stuff_whitelist: list, optional |
|
None means use all stuff categories. Otherwise a list giving a whitelist of stuff |
|
category names to use. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
image_dir, |
|
instances_json, |
|
stuff_json=None, |
|
stuff_only=True, |
|
image_size=64, |
|
normalize_images=True, |
|
max_samples=None, |
|
min_object_size=0.02, |
|
min_objects_per_image=3, |
|
max_objects_per_image=8, |
|
instance_whitelist=None, |
|
stuff_whitelist=None, |
|
no__img__=False, |
|
test_part=False, |
|
split="train", |
|
iscrowd=True, |
|
mode="train", |
|
**kwargs |
|
): |
|
super(Dataset, self).__init__() |
|
if stuff_only and stuff_json is None: |
|
print("WARNING: Got stuff_only=True but stuff_json=None.") |
|
print("Falling back to stuff_only=False.") |
|
|
|
self.image_dir = image_dir |
|
self.max_samples = max_samples |
|
self.normalize_images = normalize_images |
|
self.iscrowd = iscrowd |
|
|
|
|
|
self.left_right_flip = False |
|
self.max_objects_per_image = max_objects_per_image |
|
self.mode = mode |
|
|
|
if image_size is not None: |
|
self.set_image_size(image_size) |
|
print(self.transform) |
|
self.no__img__ = no__img__ |
|
|
|
with open(instances_json, "r") as f: |
|
instances_data = json.load(f) |
|
|
|
self.image_id_to_sentences = {} |
|
stuff_data = None |
|
if stuff_json is not None and stuff_json != "": |
|
with open(stuff_json, "r") as f: |
|
stuff_data = json.load(f) |
|
|
|
self.image_ids = [] |
|
self.image_id_to_filename = {} |
|
self.image_id_to_size = {} |
|
for image_data in instances_data["images"]: |
|
image_id = image_data["id"] |
|
filename = image_data["file_name"] |
|
width = image_data["width"] |
|
height = image_data["height"] |
|
self.image_ids.append(image_id) |
|
self.image_id_to_filename[image_id] = filename |
|
self.image_id_to_size[image_id] = (width, height) |
|
|
|
object_idx_to_name = {} |
|
|
|
all_instance_categories = self.populate_categories( |
|
instances_data, object_idx_to_name |
|
) |
|
all_stuff_categories = self.populate_categories(stuff_data, object_idx_to_name) |
|
|
|
if instance_whitelist is None: |
|
instance_whitelist = all_instance_categories |
|
if stuff_whitelist is None: |
|
stuff_whitelist = all_stuff_categories |
|
|
|
category_whitelist = set(instance_whitelist) | set(stuff_whitelist) |
|
|
|
|
|
self.image_id_to_objects = defaultdict(list) |
|
self.add_object_instances( |
|
instances_data, min_object_size, object_idx_to_name, category_whitelist |
|
) |
|
image_ids_with_stuff = self.add_object_instances( |
|
stuff_data, min_object_size, object_idx_to_name, category_whitelist |
|
) |
|
if stuff_only: |
|
new_image_ids = [] |
|
for image_id in self.image_ids: |
|
if image_id in image_ids_with_stuff: |
|
new_image_ids.append(image_id) |
|
self.image_ids = new_image_ids |
|
|
|
all_image_ids = set(self.image_id_to_filename.keys()) |
|
image_ids_to_remove = all_image_ids - image_ids_with_stuff |
|
for image_id in image_ids_to_remove: |
|
self.image_id_to_filename.pop(image_id, None) |
|
self.image_id_to_size.pop(image_id, None) |
|
self.image_id_to_objects.pop(image_id, None) |
|
|
|
|
|
new_image_ids = [] |
|
total_objs = 0 |
|
for image_id in self.image_ids: |
|
num_objs = len(self.image_id_to_objects[image_id]) |
|
total_objs += num_objs |
|
if min_objects_per_image <= num_objs <= max_objects_per_image: |
|
new_image_ids.append(image_id) |
|
self.image_ids = new_image_ids |
|
|
|
if split == "val": |
|
if test_part: |
|
self.image_ids = self.image_ids[1024:] |
|
else: |
|
print("Entering in val part") |
|
self.image_ids = self.image_ids[:1024] |
|
|
|
def populate_categories(self, data, object_idx_to_name): |
|
all_categories = [] |
|
for category_data in data["categories"]: |
|
category_id = category_data["id"] |
|
category_name = category_data["name"] |
|
all_categories.append(category_name) |
|
object_idx_to_name[category_id] = category_name |
|
return all_categories |
|
|
|
def add_object_instances( |
|
self, data, min_object_size, object_idx_to_name, category_whitelist |
|
): |
|
image_ids_present = set() |
|
for object_data in data["annotations"]: |
|
image_id = object_data["image_id"] |
|
_, _, w, h = object_data["bbox"] |
|
image_ids_present.add(image_id) |
|
W, H = self.image_id_to_size[image_id] |
|
box_area = (w * h) / (W * H) |
|
box_ok = box_area > min_object_size |
|
object_name = object_idx_to_name[object_data["category_id"]] |
|
category_ok = object_name in category_whitelist |
|
other_ok = object_name != "other" |
|
|
|
condition = box_ok and category_ok and other_ok |
|
if self.iscrowd: |
|
condition = condition and (object_data["iscrowd"] != 1) |
|
if condition: |
|
self.image_id_to_objects[image_id].append(object_data) |
|
return image_ids_present |
|
|
|
def set_image_size(self, image_size): |
|
print("called set_image_size", image_size) |
|
transform = [Resize(image_size), T.ToTensor()] |
|
if self.normalize_images: |
|
transform.append(imagenet_preprocess()) |
|
self.transform = T.Compose(transform) |
|
self.image_size = image_size |
|
|
|
def total_objects(self): |
|
total_objs = 0 |
|
for i, image_id in enumerate(self.image_ids): |
|
if self.max_samples and i >= self.max_samples: |
|
break |
|
num_objs = len(self.image_id_to_objects[image_id]) |
|
total_objs += num_objs |
|
return total_objs |
|
|
|
def __len__(self): |
|
if self.max_samples is None: |
|
if self.left_right_flip: |
|
return len(self.image_ids) * 2 |
|
return len(self.image_ids) |
|
return min(len(self.image_ids), self.max_samples) |
|
|
|
def __getitem__(self, index): |
|
""" Get an image, a void label and the image index. |
|
Returns a tuple of: image (FloatTensor of shape (C, H, W)), void label 0 and image index. |
|
|
|
""" |
|
flip = False |
|
if self.mode == "train": |
|
if index >= len(self.image_ids): |
|
index = index - len(self.image_ids) |
|
flip = True |
|
|
|
image_id = self.image_ids[index] |
|
|
|
filename = self.image_id_to_filename[image_id] |
|
image_path = os.path.join(self.image_dir, filename) |
|
with open(image_path, "rb") as f: |
|
with PIL.Image.open(f) as image: |
|
if flip and self.mode == "train": |
|
image = PIL.ImageOps.mirror(image) |
|
image = self.transform(image.convert("RGB")) |
|
|
|
return image, int(0), image_id |
|
|