# Copyright (c) Facebook, Inc. and its affiliates. import functools import json import logging import multiprocessing as mp import os from itertools import chain import numpy as np import pycocotools.mask as mask_util from detectron2.structures import BoxMode from detectron2.utils.comm import get_world_size from detectron2.utils.file_io import PathManager from detectron2.utils.logger import setup_logger from PIL import Image try: import cv2 # noqa except ImportError: # OpenCV is an optional dependency at the moment pass logger = logging.getLogger(__name__) def _get_cityscapes_files(image_dir, gt_dir): files = [] # scan through the directory cities = PathManager.ls(image_dir) logger.info(f"{len(cities)} cities found in '{image_dir}'.") for city in cities: city_img_dir = os.path.join(image_dir, city) city_gt_dir = os.path.join(gt_dir, city) for basename in PathManager.ls(city_img_dir): image_file = os.path.join(city_img_dir, basename) suffix = "leftImg8bit.png" assert basename.endswith(suffix), basename basename = basename[: -len(suffix)] instance_file = os.path.join( city_gt_dir, basename + "gtFine_instanceIds.png" ) label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") files.append((image_file, instance_file, label_file, json_file)) assert len(files), "No images found in {}".format(image_dir) for f in files[0]: assert PathManager.isfile(f), f return files def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets `_ ) """ if from_json: assert to_polygons, ( "Cityscapes's json annotations are in polygon format. " "Converting to mask format is not supported now." ) files = _get_cityscapes_files(image_dir, gt_dir) logger.info("Preprocessing cityscapes annotations ...") # This is still not fast: all workers will execute duplicate works and will # take up to 10m on a 8GPU server. pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) ret = pool.map( functools.partial( _cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons ), files, ) logger.info("Loaded {} images from {}".format(len(ret), image_dir)) # Map cityscape ids to contiguous ids from cityscapesscripts.helpers.labels import labels labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} for dict_per_image in ret: for anno in dict_per_image["annotations"]: anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] return ret def load_cityscapes_semantic(image_dir, gt_dir): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". Returns: list[dict]: a list of dict, each has "file_name" and "sem_seg_file_name". """ ret = [] # gt_dir is small and contain many small files. make sense to fetch to local first gt_dir = PathManager.get_local_path(gt_dir) for image_file, _, label_file, json_file in _get_cityscapes_files( image_dir, gt_dir ): label_file = label_file.replace("labelIds", "labelTrainIds") with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret.append( { "file_name": image_file, "sem_seg_file_name": label_file, "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } ) assert len(ret), f"No images found in {image_dir}!" assert PathManager.isfile( ret[0]["sem_seg_file_name"] ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa return ret def _cityscapes_files_to_dict(files, from_json, to_polygons): """ Parse cityscapes annotation files to a instance segmentation dataset dict. Args: files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: A dict in Detectron2 Dataset format. """ from cityscapesscripts.helpers.labels import id2label, name2label image_file, instance_id_file, _, json_file = files annos = [] if from_json: from shapely.geometry import MultiPolygon, Polygon with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } # `polygons_union` contains the union of all valid polygons. polygons_union = Polygon() # CityscapesScripts draw the polygons in sequential order # and each polygon *overwrites* existing ones. See # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa # We use reverse order, and each polygon *avoids* early ones. # This will resolve the ploygon overlaps in the same way as CityscapesScripts. for obj in jsonobj["objects"][::-1]: if "deleted" in obj: # cityscapes data format specific continue label_name = obj["label"] try: label = name2label[label_name] except KeyError: if label_name.endswith("group"): # crowd area label = name2label[label_name[: -len("group")]] else: raise if label.id < 0: # cityscapes data format continue # Cityscapes's raw annotations uses integer coordinates # Therefore +0.5 here poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 # CityscapesScript uses PIL.ImageDraw.polygon to rasterize # polygons for evaluation. This function operates in integer space # and draws each pixel whose center falls into the polygon. # Therefore it draws a polygon which is 0.5 "fatter" in expectation. # We therefore dilate the input polygon by 0.5 as our input. poly = Polygon(poly_coord).buffer(0.5, resolution=4) if not label.hasInstances or label.ignoreInEval: # even if we won't store the polygon it still contributes to overlaps resolution polygons_union = polygons_union.union(poly) continue # Take non-overlapping part of the polygon poly_wo_overlaps = poly.difference(polygons_union) if poly_wo_overlaps.is_empty: continue polygons_union = polygons_union.union(poly) anno = {} anno["iscrowd"] = label_name.endswith("group") anno["category_id"] = label.id if isinstance(poly_wo_overlaps, Polygon): poly_list = [poly_wo_overlaps] elif isinstance(poly_wo_overlaps, MultiPolygon): poly_list = poly_wo_overlaps.geoms else: raise NotImplementedError( "Unknown geometric structure {}".format(poly_wo_overlaps) ) poly_coord = [] for poly_el in poly_list: # COCO API can work only with exterior boundaries now, hence we store only them. # TODO: store both exterior and interior boundaries once other parts of the # codebase support holes in polygons. poly_coord.append(list(chain(*poly_el.exterior.coords))) anno["segmentation"] = poly_coord (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds anno["bbox"] = (xmin, ymin, xmax, ymax) anno["bbox_mode"] = BoxMode.XYXY_ABS annos.append(anno) else: # See also the official annotation parsing scripts at # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa with PathManager.open(instance_id_file, "rb") as f: inst_image = np.asarray(Image.open(f), order="F") # ids < 24 are stuff labels (filtering them first is about 5% faster) flattened_ids = np.unique(inst_image[inst_image >= 24]) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": inst_image.shape[0], "width": inst_image.shape[1], } for instance_id in flattened_ids: # For non-crowd annotations, instance_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = instance_id // 1000 if instance_id >= 1000 else instance_id label = id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue anno = {} anno["iscrowd"] = instance_id < 1000 anno["category_id"] = label.id mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") inds = np.nonzero(mask) ymin, ymax = inds[0].min(), inds[0].max() xmin, xmax = inds[1].min(), inds[1].max() anno["bbox"] = (xmin, ymin, xmax, ymax) if xmax <= xmin or ymax <= ymin: continue anno["bbox_mode"] = BoxMode.XYXY_ABS if to_polygons: # This conversion comes from D4809743 and D5171122, # when Mask-RCNN was first developed. contours = cv2.findContours( mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE )[-2] polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] # opencv's can produce invalid polygons if len(polygons) == 0: continue anno["segmentation"] = polygons else: anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] annos.append(anno) ret["annotations"] = annos return ret def main() -> None: global logger, labels """ Test the cityscapes dataset loader. Usage: python -m detectron2.data.datasets.cityscapes \ cityscapes/leftImg8bit/train cityscapes/gtFine/train """ import argparse parser = argparse.ArgumentParser() parser.add_argument("image_dir") parser.add_argument("gt_dir") parser.add_argument("--type", choices=["instance", "semantic"], default="instance") args = parser.parse_args() from cityscapesscripts.helpers.labels import labels from detectron2.data.catalog import Metadata from detectron2.utils.visualizer import Visualizer logger = setup_logger(name=__name__) dirname = "cityscapes-data-vis" os.makedirs(dirname, exist_ok=True) if args.type == "instance": dicts = load_cityscapes_instances( args.image_dir, args.gt_dir, from_json=True, to_polygons=True ) logger.info("Done loading {} samples.".format(len(dicts))) thing_classes = [ k.name for k in labels if k.hasInstances and not k.ignoreInEval ] meta = Metadata().set(thing_classes=thing_classes) else: dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) logger.info("Done loading {} samples.".format(len(dicts))) stuff_classes = [k.name for k in labels if k.trainId != 255] stuff_colors = [k.color for k in labels if k.trainId != 255] meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors) for d in dicts: img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) # cv2.imshow("a", vis.get_image()[:, :, ::-1]) # cv2.waitKey() fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath) if __name__ == "__main__": main() # pragma: no cover