|
|
|
|
|
|
|
|
import os.path as osp |
|
|
from typing import Callable, List, Optional, Union |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
from mmdet3d.registry import DATASETS |
|
|
from mmdet3d.structures import DepthInstance3DBoxes |
|
|
from .det3d_dataset import Det3DDataset |
|
|
|
|
|
|
|
|
@DATASETS.register_module() |
|
|
class SUNRGBDDataset(Det3DDataset): |
|
|
r"""SUNRGBD Dataset. |
|
|
|
|
|
This class serves as the API for experiments on the SUNRGBD Dataset. |
|
|
|
|
|
See the `download page <http://rgbd.cs.princeton.edu/challenge.html>`_ |
|
|
for data downloading. |
|
|
|
|
|
Args: |
|
|
data_root (str): Path of dataset root. |
|
|
ann_file (str): Path of annotation file. |
|
|
metainfo (dict, optional): Meta information for dataset, such as class |
|
|
information. Defaults to None. |
|
|
data_prefix (dict): Prefix for data. Defaults to |
|
|
dict(pts='points',img='sunrgbd_trainval'). |
|
|
pipeline (List[dict]): Pipeline used for data processing. |
|
|
Defaults to []. |
|
|
modality (dict): Modality to specify the sensor data used as input. |
|
|
Defaults to dict(use_camera=True, use_lidar=True). |
|
|
default_cam_key (str): The default camera name adopted. |
|
|
Defaults to 'CAM0'. |
|
|
box_type_3d (str): Type of 3D box of this dataset. |
|
|
Based on the `box_type_3d`, the dataset will encapsulate the box |
|
|
to its original format then converted them to `box_type_3d`. |
|
|
Defaults to 'Depth' in this dataset. Available options includes: |
|
|
|
|
|
- 'LiDAR': Box in LiDAR coordinates. |
|
|
- 'Depth': Box in depth coordinates, usually for indoor dataset. |
|
|
- 'Camera': Box in camera coordinates. |
|
|
filter_empty_gt (bool): Whether to filter empty GT. |
|
|
Defaults to True. |
|
|
test_mode (bool): Whether the dataset is in test mode. |
|
|
Defaults to False. |
|
|
""" |
|
|
METAINFO = { |
|
|
'classes': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', |
|
|
'dresser', 'night_stand', 'bookshelf', 'bathtub'), |
|
|
'palette': [(255, 187, 120), (255, 152, 150), (140, 86, 75), |
|
|
(188, 189, 34), (44, 160, 44), (247, 182, 210), |
|
|
(196, 156, 148), (23, 190, 207), (148, 103, 189), |
|
|
(227, 119, 194)] |
|
|
} |
|
|
|
|
|
def __init__(self, |
|
|
data_root: str, |
|
|
ann_file: str, |
|
|
metainfo: Optional[dict] = None, |
|
|
data_prefix: dict = dict( |
|
|
pts='points', img='sunrgbd_trainval/image'), |
|
|
pipeline: List[Union[dict, Callable]] = [], |
|
|
default_cam_key: str = 'CAM0', |
|
|
modality: dict = dict(use_camera=True, use_lidar=True), |
|
|
box_type_3d: str = 'Depth', |
|
|
filter_empty_gt: bool = True, |
|
|
test_mode: bool = False, |
|
|
**kwargs) -> None: |
|
|
super().__init__( |
|
|
data_root=data_root, |
|
|
ann_file=ann_file, |
|
|
metainfo=metainfo, |
|
|
data_prefix=data_prefix, |
|
|
pipeline=pipeline, |
|
|
default_cam_key=default_cam_key, |
|
|
modality=modality, |
|
|
box_type_3d=box_type_3d, |
|
|
filter_empty_gt=filter_empty_gt, |
|
|
test_mode=test_mode, |
|
|
**kwargs) |
|
|
assert 'use_camera' in self.modality and \ |
|
|
'use_lidar' in self.modality |
|
|
assert self.modality['use_camera'] or self.modality['use_lidar'] |
|
|
|
|
|
def parse_data_info(self, info: dict) -> dict: |
|
|
"""Process the raw data info. |
|
|
|
|
|
Convert all relative path of needed modality data file to |
|
|
the absolute path. And process |
|
|
the `instances` field to `ann_info` in training stage. |
|
|
|
|
|
Args: |
|
|
info (dict): Raw info dict. |
|
|
|
|
|
Returns: |
|
|
dict: Has `ann_info` in training stage. And |
|
|
all path has been converted to absolute path. |
|
|
""" |
|
|
|
|
|
if self.modality['use_lidar']: |
|
|
info['lidar_points']['lidar_path'] = \ |
|
|
osp.join( |
|
|
self.data_prefix.get('pts', ''), |
|
|
info['lidar_points']['lidar_path']) |
|
|
|
|
|
if self.modality['use_camera']: |
|
|
for cam_id, img_info in info['images'].items(): |
|
|
if 'img_path' in img_info: |
|
|
img_info['img_path'] = osp.join( |
|
|
self.data_prefix.get('img', ''), img_info['img_path']) |
|
|
if self.default_cam_key is not None: |
|
|
info['img_path'] = info['images'][ |
|
|
self.default_cam_key]['img_path'] |
|
|
info['depth2img'] = np.array( |
|
|
info['images'][self.default_cam_key]['depth2img'], |
|
|
dtype=np.float32) |
|
|
|
|
|
if not self.test_mode: |
|
|
|
|
|
info['ann_info'] = self.parse_ann_info(info) |
|
|
if self.test_mode and self.load_eval_anns: |
|
|
info['eval_ann_info'] = self.parse_ann_info(info) |
|
|
|
|
|
return info |
|
|
|
|
|
def parse_ann_info(self, info: dict) -> dict: |
|
|
"""Process the `instances` in data info to `ann_info`. |
|
|
|
|
|
Args: |
|
|
info (dict): Info dict. |
|
|
|
|
|
Returns: |
|
|
dict: Processed `ann_info` |
|
|
""" |
|
|
ann_info = super().parse_ann_info(info) |
|
|
|
|
|
if ann_info is None: |
|
|
ann_info = dict() |
|
|
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) |
|
|
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) |
|
|
|
|
|
ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( |
|
|
ann_info['gt_bboxes_3d'], |
|
|
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) |
|
|
|
|
|
return ann_info |
|
|
|