deeplab2 / data /dataset.py
akhaliq3
spaces demo
506da10
# coding=utf-8
# Copyright 2021 The Deeplab2 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides data from segmentation datasets.
Currently, we support the following datasets:
1. Cityscapes dataset (https://www.cityscapes-dataset.com).
The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
and so on) for urban street scenes.
2. KITTI-STEP (http://www.cvlibs.net/datasets/kitti/).
The KITTI-STEP enriches the KITTI-MOTS data with additional `stuff'
anntotations.
3. MOTChallenge-STEP (https://motchallenge.net/).
The MOTChallenge-STEP enriches the MOTSChallenge data with additional `stuff'
annotations.
4. MSCOCO panoptic segmentation (http://cocodataset.org/#panoptic-2018).
Panoptic segmentation annotations for MSCOCO dataset. Note that we convert the
provided MSCOCO panoptic segmentation format to the following one:
panoptic label = semantic label * 256 + instance id.
5. Cityscapes-DVPS (https://github.com/joe-siyuan-qiao/ViP-DeepLab)
The Cityscapes-DVPS dataset augments Cityscapes-VPS
(https://github.com/mcahny/vps) with depth annotations.
References:
- Marius Cordts, Mohamed Omran, Sebastian Ramos, Timo Rehfeld, Markus
Enzweiler, Rodrigo Benenson, Uwe Franke, Stefan Roth, and Bernt Schiele, "The
Cityscapes Dataset for Semantic Urban Scene Understanding." In CVPR, 2016.
- Andreas Geiger and Philip Lenz and Raquel Urtasun, "Are we ready for
Autonomous Driving? The KITTI Vision Benchmark Suite." In CVPR, 2012.
- Alexander Kirillov, Kaiming He, Ross Girshick, Carsten Rother, and Piotr
Dollar, "Panoptic Segmentation." In CVPR, 2019.
- Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B.
Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollar, and C.
Lawrence Zitnick, "Microsoft COCO: common objects in context." In ECCV, 2014.
- Anton Milan, Laura Leal-Taixe, Ian Reid, Stefan Roth, and Konrad Schindler,
"Mot16: A benchmark for multi-object tracking." arXiv:1603.00831, 2016.
- Paul Voigtlaender, Michael Krause, Aljosa Osep, Jonathon Luiten, Berin
Balachandar Gnana Sekar, Andreas Geiger, and Bastian Leibe. "MOTS:
Multi-object tracking and segmentation." In CVPR, 2019
- Mark Weber, Jun Xie, Maxwell Collins, Yukun Zhu, Paul Voigtlaender, Hartwig
Adam, Bradley Green, Andreas Geiger, Bastian Leibe, Daniel Cremers, Aljosa
Osep, Laura Leal-Taixe, and Liang-Chieh Chen, "STEP: Segmenting and Tracking
Every Pixel." arXiv: 2102.11859, 2021.
- Dahun Kim, Sanghyun Woo, Joon-Young Lee, and In So Kweon. "Video panoptic
segmentation." In CVPR, 2020.
- Siyuan Qiao, Yukun Zhu, Hartwig Adam, Alan Yuille, and Liang-Chieh Chen.
"ViP-DeepLab: Learning Visual Perception with Depth-aware Video Panoptic
Segmentation." In CVPR, 2021.
"""
import collections
# Dataset names.
_CITYSCAPES = 'cityscapes'
_CITYSCAPES_PANOPTIC = 'cityscapes_panoptic'
_KITTI_STEP = 'kitti_step'
_MOTCHALLENGE_STEP = 'motchallenge_step'
_CITYSCAPES_DVPS = 'cityscapes_dvps'
_COCO_PANOPTIC = 'coco_panoptic'
# Colormap names.
_CITYSCAPES_COLORMAP = 'cityscapes'
_MOTCHALLENGE_COLORMAP = 'motchallenge'
_COCO_COLORMAP = 'coco'
# Named tuple to describe dataset properties.
DatasetDescriptor = collections.namedtuple(
'DatasetDescriptor', [
'dataset_name', # Dataset name.
'splits_to_sizes', # Splits of the dataset into training, val and test.
'num_classes', # Number of semantic classes.
'ignore_label', # Ignore label value used for semantic segmentation.
# Fields below are used for panoptic segmentation and will be None for
# Semantic segmentation datasets.
# Label divisor only used in panoptic segmentation annotation to infer
# semantic label and instance id.
'panoptic_label_divisor',
# A tuple of classes that contains instance annotations. For example,
# 'person' class has instance annotations while 'sky' does not.
'class_has_instances_list',
# A flag indicating whether the dataset is a video dataset that contains
# sequence IDs and frame IDs.
'is_video_dataset',
# A string specifying the colormap that should be used for
# visualization. E.g. 'cityscapes'.
'colormap',
# A flag indicating whether the dataset contains depth annotation.
'is_depth_dataset',
]
)
CITYSCAPES_INFORMATION = DatasetDescriptor(
dataset_name=_CITYSCAPES,
splits_to_sizes={'train_fine': 2975,
'train_coarse': 22973,
'trainval_fine': 3475,
'trainval_coarse': 23473,
'val_fine': 500,
'test_fine': 1525},
num_classes=19,
ignore_label=255,
panoptic_label_divisor=None,
class_has_instances_list=None,
is_video_dataset=False,
colormap=_CITYSCAPES_COLORMAP,
is_depth_dataset=False,
)
CITYSCAPES_PANOPTIC_INFORMATION = DatasetDescriptor(
dataset_name=_CITYSCAPES_PANOPTIC,
splits_to_sizes={'train_fine': 2975,
'val_fine': 500,
'trainval_fine': 3475,
'test_fine': 1525},
num_classes=19,
ignore_label=255,
panoptic_label_divisor=1000,
class_has_instances_list=tuple(range(11, 19)),
is_video_dataset=False,
colormap=_CITYSCAPES_COLORMAP,
is_depth_dataset=False,
)
KITTI_STEP_INFORMATION = DatasetDescriptor(
dataset_name=_KITTI_STEP,
splits_to_sizes={'train': 5027,
'val': 2981,
'test': 11095},
num_classes=19,
ignore_label=255,
panoptic_label_divisor=1000,
class_has_instances_list=(11, 13),
is_video_dataset=True,
colormap=_CITYSCAPES_COLORMAP,
is_depth_dataset=False,
)
MOTCHALLENGE_STEP_INFORMATION = DatasetDescriptor(
dataset_name=_MOTCHALLENGE_STEP,
splits_to_sizes={'train': 525, # Sequence 9.
'val': 600, # Sequence 2.
'test': 0},
num_classes=7,
ignore_label=255,
panoptic_label_divisor=1000,
class_has_instances_list=(4,),
is_video_dataset=True,
colormap=_MOTCHALLENGE_COLORMAP,
is_depth_dataset=False,
)
CITYSCAPES_DVPS_INFORMATION = DatasetDescriptor(
dataset_name=_CITYSCAPES_DVPS,
# The numbers of images are 2400/300/300 for train/val/test. Here, the
# sizes are the number of consecutive frame pairs. As each sequence has 6
# frames, the number of pairs for the train split is 2400 / 6 * 5 = 2000.
# Similarly, we get 250 pairs for the val split and the test split.
splits_to_sizes={'train': 2000,
'val': 250,
'test': 250},
num_classes=19,
ignore_label=255,
panoptic_label_divisor=1000,
class_has_instances_list=tuple(range(11, 19)),
is_video_dataset=True,
colormap=_CITYSCAPES_COLORMAP,
is_depth_dataset=True,
)
COCO_PANOPTIC_INFORMATION = DatasetDescriptor(
dataset_name=_COCO_PANOPTIC,
splits_to_sizes={'train': 118287,
'val': 5000,
'test': 40670},
num_classes=134,
ignore_label=0,
panoptic_label_divisor=256,
class_has_instances_list=tuple(range(1, 81)),
is_video_dataset=False,
colormap=_COCO_COLORMAP,
is_depth_dataset=False,
)
MAP_NAME_TO_DATASET_INFO = {
_CITYSCAPES: CITYSCAPES_INFORMATION,
_CITYSCAPES_PANOPTIC: CITYSCAPES_PANOPTIC_INFORMATION,
_KITTI_STEP: KITTI_STEP_INFORMATION,
_MOTCHALLENGE_STEP: MOTCHALLENGE_STEP_INFORMATION,
_CITYSCAPES_DVPS: CITYSCAPES_DVPS_INFORMATION,
_COCO_PANOPTIC: COCO_PANOPTIC_INFORMATION,
}
MAP_NAMES = list(MAP_NAME_TO_DATASET_INFO.keys())