rlawjdghek's picture
prep (#1)
61c2d32 verified
raw
history blame
4.46 kB
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import logging
import numpy as np
from typing import Callable, List, Union
import torch
from panopticapi.utils import rgb2id
from detectron2.config import configurable
from detectron2.data import MetadataCatalog
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from .target_generator import PanopticDeepLabTargetGenerator
__all__ = ["PanopticDeeplabDatasetMapper"]
class PanopticDeeplabDatasetMapper:
"""
The callable currently does the following:
1. Read the image from "file_name" and label from "pan_seg_file_name"
2. Applies random scale, crop and flip transforms to image and label
3. Prepare data to Tensor and generate training targets from label
"""
@configurable
def __init__(
self,
*,
augmentations: List[Union[T.Augmentation, T.Transform]],
image_format: str,
panoptic_target_generator: Callable,
):
"""
NOTE: this interface is experimental.
Args:
augmentations: a list of augmentations or deterministic transforms to apply
image_format: an image format supported by :func:`detection_utils.read_image`.
panoptic_target_generator: a callable that takes "panoptic_seg" and
"segments_info" to generate training targets for the model.
"""
# fmt: off
self.augmentations = T.AugmentationList(augmentations)
self.image_format = image_format
# fmt: on
logger = logging.getLogger(__name__)
logger.info("Augmentations used in training: " + str(augmentations))
self.panoptic_target_generator = panoptic_target_generator
@classmethod
def from_config(cls, cfg):
augs = [
T.ResizeShortestEdge(
cfg.INPUT.MIN_SIZE_TRAIN,
cfg.INPUT.MAX_SIZE_TRAIN,
cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING,
)
]
if cfg.INPUT.CROP.ENABLED:
augs.append(T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
augs.append(T.RandomFlip())
# Assume always applies to the training set.
dataset_names = cfg.DATASETS.TRAIN
meta = MetadataCatalog.get(dataset_names[0])
panoptic_target_generator = PanopticDeepLabTargetGenerator(
ignore_label=meta.ignore_label,
thing_ids=list(meta.thing_dataset_id_to_contiguous_id.values()),
sigma=cfg.INPUT.GAUSSIAN_SIGMA,
ignore_stuff_in_offset=cfg.INPUT.IGNORE_STUFF_IN_OFFSET,
small_instance_area=cfg.INPUT.SMALL_INSTANCE_AREA,
small_instance_weight=cfg.INPUT.SMALL_INSTANCE_WEIGHT,
ignore_crowd_in_semantic=cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC,
)
ret = {
"augmentations": augs,
"image_format": cfg.INPUT.FORMAT,
"panoptic_target_generator": panoptic_target_generator,
}
return ret
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# Load image.
image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
utils.check_image_size(dataset_dict, image)
# Panoptic label is encoded in RGB image.
pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
# Reuses semantic transform for panoptic labels.
aug_input = T.AugInput(image, sem_seg=pan_seg_gt)
_ = self.augmentations(aug_input)
image, pan_seg_gt = aug_input.image, aug_input.sem_seg
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
# Therefore it's important to use torch.Tensor.
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
# Generates training targets for Panoptic-DeepLab.
targets = self.panoptic_target_generator(rgb2id(pan_seg_gt), dataset_dict["segments_info"])
dataset_dict.update(targets)
return dataset_dict