|
_base_ = ['../../../configs/_base_/default_runtime.py'] |
|
custom_imports = dict( |
|
imports=['projects.BEVFusion.bevfusion'], allow_failed_imports=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
voxel_size = [0.075, 0.075, 0.2] |
|
point_cloud_range = [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0] |
|
class_names = [ |
|
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', |
|
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone' |
|
] |
|
|
|
metainfo = dict(classes=class_names) |
|
dataset_type = 'NuScenesDataset' |
|
data_root = 'data/nuscenes/' |
|
data_prefix = dict( |
|
pts='samples/LIDAR_TOP', |
|
CAM_FRONT='samples/CAM_FRONT', |
|
CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT', |
|
CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT', |
|
CAM_BACK='samples/CAM_BACK', |
|
CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT', |
|
CAM_BACK_LEFT='samples/CAM_BACK_LEFT', |
|
sweeps='sweeps/LIDAR_TOP') |
|
input_modality = dict(use_lidar=True, use_camera=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend_args = None |
|
|
|
model = dict( |
|
type='BEVFusion', |
|
data_preprocessor=dict( |
|
type='Det3DDataPreprocessor', |
|
pad_size_divisor=32, |
|
voxelize_cfg=dict( |
|
max_num_points=10, |
|
point_cloud_range=[-54.0, -54.0, -5.0, 54.0, 54.0, 3.0], |
|
voxel_size=[0.075, 0.075, 0.2], |
|
max_voxels=[120000, 160000], |
|
voxelize_reduce=True)), |
|
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), |
|
pts_middle_encoder=dict( |
|
type='BEVFusionSparseEncoder', |
|
in_channels=5, |
|
sparse_shape=[1440, 1440, 41], |
|
order=('conv', 'norm', 'act'), |
|
norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01), |
|
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, |
|
128)), |
|
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), |
|
block_type='basicblock'), |
|
pts_backbone=dict( |
|
type='SECOND', |
|
in_channels=256, |
|
out_channels=[128, 256], |
|
layer_nums=[5, 5], |
|
layer_strides=[1, 2], |
|
norm_cfg=dict(type='BN', eps=0.001, momentum=0.01), |
|
conv_cfg=dict(type='Conv2d', bias=False)), |
|
pts_neck=dict( |
|
type='SECONDFPN', |
|
in_channels=[128, 256], |
|
out_channels=[256, 256], |
|
upsample_strides=[1, 2], |
|
norm_cfg=dict(type='BN', eps=0.001, momentum=0.01), |
|
upsample_cfg=dict(type='deconv', bias=False), |
|
use_conv_for_no_stride=True), |
|
bbox_head=dict( |
|
type='TransFusionHead', |
|
num_proposals=200, |
|
auxiliary=True, |
|
in_channels=512, |
|
hidden_channel=128, |
|
num_classes=10, |
|
nms_kernel_size=3, |
|
bn_momentum=0.1, |
|
num_decoder_layers=1, |
|
decoder_layer=dict( |
|
type='TransformerDecoderLayer', |
|
self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), |
|
cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), |
|
ffn_cfg=dict( |
|
embed_dims=128, |
|
feedforward_channels=256, |
|
num_fcs=2, |
|
ffn_drop=0.1, |
|
act_cfg=dict(type='ReLU', inplace=True), |
|
), |
|
norm_cfg=dict(type='LN'), |
|
pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128)), |
|
train_cfg=dict( |
|
dataset='nuScenes', |
|
point_cloud_range=[-54.0, -54.0, -5.0, 54.0, 54.0, 3.0], |
|
grid_size=[1440, 1440, 41], |
|
voxel_size=[0.075, 0.075, 0.2], |
|
out_size_factor=8, |
|
gaussian_overlap=0.1, |
|
min_radius=2, |
|
pos_weight=-1, |
|
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], |
|
assigner=dict( |
|
type='HungarianAssigner3D', |
|
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'), |
|
cls_cost=dict( |
|
type='mmdet.FocalLossCost', |
|
gamma=2.0, |
|
alpha=0.25, |
|
weight=0.15), |
|
reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25), |
|
iou_cost=dict(type='IoU3DCost', weight=0.25))), |
|
test_cfg=dict( |
|
dataset='nuScenes', |
|
grid_size=[1440, 1440, 41], |
|
out_size_factor=8, |
|
voxel_size=[0.075, 0.075], |
|
pc_range=[-54.0, -54.0], |
|
nms_type=None), |
|
common_heads=dict( |
|
center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), |
|
bbox_coder=dict( |
|
type='TransFusionBBoxCoder', |
|
pc_range=[-54.0, -54.0], |
|
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], |
|
score_threshold=0.0, |
|
out_size_factor=8, |
|
voxel_size=[0.075, 0.075], |
|
code_size=10), |
|
loss_cls=dict( |
|
type='mmdet.FocalLoss', |
|
use_sigmoid=True, |
|
gamma=2.0, |
|
alpha=0.25, |
|
reduction='mean', |
|
loss_weight=1.0), |
|
loss_heatmap=dict( |
|
type='mmdet.GaussianFocalLoss', reduction='mean', loss_weight=1.0), |
|
loss_bbox=dict( |
|
type='mmdet.L1Loss', reduction='mean', loss_weight=0.25))) |
|
|
|
db_sampler = dict( |
|
data_root=data_root, |
|
info_path=data_root + 'nuscenes_dbinfos_train.pkl', |
|
rate=1.0, |
|
prepare=dict( |
|
filter_by_difficulty=[-1], |
|
filter_by_min_points=dict( |
|
car=5, |
|
truck=5, |
|
bus=5, |
|
trailer=5, |
|
construction_vehicle=5, |
|
traffic_cone=5, |
|
barrier=5, |
|
motorcycle=5, |
|
bicycle=5, |
|
pedestrian=5)), |
|
classes=class_names, |
|
sample_groups=dict( |
|
car=2, |
|
truck=3, |
|
construction_vehicle=7, |
|
bus=4, |
|
trailer=6, |
|
barrier=2, |
|
motorcycle=6, |
|
bicycle=6, |
|
pedestrian=2, |
|
traffic_cone=2), |
|
points_loader=dict( |
|
type='LoadPointsFromFile', |
|
coord_type='LIDAR', |
|
load_dim=5, |
|
use_dim=[0, 1, 2, 3, 4], |
|
backend_args=backend_args)) |
|
|
|
train_pipeline = [ |
|
dict( |
|
type='LoadPointsFromFile', |
|
coord_type='LIDAR', |
|
load_dim=5, |
|
use_dim=5, |
|
backend_args=backend_args), |
|
dict( |
|
type='LoadPointsFromMultiSweeps', |
|
sweeps_num=9, |
|
load_dim=5, |
|
use_dim=5, |
|
pad_empty_sweeps=True, |
|
remove_close=True, |
|
backend_args=backend_args), |
|
dict( |
|
type='LoadAnnotations3D', |
|
with_bbox_3d=True, |
|
with_label_3d=True, |
|
with_attr_label=False), |
|
dict(type='ObjectSample', db_sampler=db_sampler), |
|
dict( |
|
type='GlobalRotScaleTrans', |
|
scale_ratio_range=[0.9, 1.1], |
|
rot_range=[-0.78539816, 0.78539816], |
|
translation_std=0.5), |
|
dict(type='BEVFusionRandomFlip3D'), |
|
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), |
|
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), |
|
dict( |
|
type='ObjectNameFilter', |
|
classes=[ |
|
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', |
|
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone' |
|
]), |
|
dict(type='PointShuffle'), |
|
dict( |
|
type='Pack3DDetInputs', |
|
keys=[ |
|
'points', 'img', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_bboxes', |
|
'gt_labels' |
|
], |
|
meta_keys=[ |
|
'cam2img', 'ori_cam2img', 'lidar2cam', 'lidar2img', 'cam2lidar', |
|
'ori_lidar2img', 'img_aug_matrix', 'box_type_3d', 'sample_idx', |
|
'lidar_path', 'img_path', 'transformation_3d_flow', 'pcd_rotation', |
|
'pcd_scale_factor', 'pcd_trans', 'img_aug_matrix', |
|
'lidar_aug_matrix' |
|
]) |
|
] |
|
|
|
test_pipeline = [ |
|
dict( |
|
type='LoadPointsFromFile', |
|
coord_type='LIDAR', |
|
load_dim=5, |
|
use_dim=5, |
|
backend_args=backend_args), |
|
dict( |
|
type='LoadPointsFromMultiSweeps', |
|
sweeps_num=9, |
|
load_dim=5, |
|
use_dim=5, |
|
pad_empty_sweeps=True, |
|
remove_close=True, |
|
backend_args=backend_args), |
|
dict( |
|
type='PointsRangeFilter', |
|
point_cloud_range=[-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]), |
|
dict( |
|
type='Pack3DDetInputs', |
|
keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d'], |
|
meta_keys=[ |
|
'cam2img', 'ori_cam2img', 'lidar2cam', 'lidar2img', 'cam2lidar', |
|
'ori_lidar2img', 'img_aug_matrix', 'box_type_3d', 'sample_idx', |
|
'lidar_path', 'img_path', 'num_pts_feats', 'num_views' |
|
]) |
|
] |
|
|
|
train_dataloader = dict( |
|
batch_size=4, |
|
num_workers=4, |
|
persistent_workers=True, |
|
sampler=dict(type='DefaultSampler', shuffle=True), |
|
dataset=dict( |
|
type='CBGSDataset', |
|
dataset=dict( |
|
type=dataset_type, |
|
data_root=data_root, |
|
ann_file='nuscenes_infos_train.pkl', |
|
pipeline=train_pipeline, |
|
metainfo=metainfo, |
|
modality=input_modality, |
|
test_mode=False, |
|
data_prefix=data_prefix, |
|
use_valid_flag=True, |
|
|
|
|
|
box_type_3d='LiDAR'))) |
|
val_dataloader = dict( |
|
batch_size=1, |
|
num_workers=4, |
|
persistent_workers=True, |
|
drop_last=False, |
|
sampler=dict(type='DefaultSampler', shuffle=False), |
|
dataset=dict( |
|
type=dataset_type, |
|
data_root=data_root, |
|
ann_file='nuscenes_infos_val.pkl', |
|
pipeline=test_pipeline, |
|
metainfo=metainfo, |
|
modality=input_modality, |
|
data_prefix=data_prefix, |
|
test_mode=True, |
|
box_type_3d='LiDAR', |
|
backend_args=backend_args)) |
|
test_dataloader = val_dataloader |
|
|
|
val_evaluator = dict( |
|
type='NuScenesMetric', |
|
data_root=data_root, |
|
ann_file=data_root + 'nuscenes_infos_val.pkl', |
|
metric='bbox', |
|
backend_args=backend_args) |
|
test_evaluator = val_evaluator |
|
|
|
vis_backends = [dict(type='LocalVisBackend')] |
|
visualizer = dict( |
|
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') |
|
|
|
|
|
lr = 0.0001 |
|
param_scheduler = [ |
|
|
|
|
|
|
|
|
|
dict( |
|
type='CosineAnnealingLR', |
|
T_max=8, |
|
eta_min=lr * 10, |
|
begin=0, |
|
end=8, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
dict( |
|
type='CosineAnnealingLR', |
|
T_max=12, |
|
eta_min=lr * 1e-4, |
|
begin=8, |
|
end=20, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
|
|
|
|
|
|
dict( |
|
type='CosineAnnealingMomentum', |
|
T_max=8, |
|
eta_min=0.85 / 0.95, |
|
begin=0, |
|
end=8, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
dict( |
|
type='CosineAnnealingMomentum', |
|
T_max=12, |
|
eta_min=1, |
|
begin=8, |
|
end=20, |
|
by_epoch=True, |
|
convert_to_iter_based=True) |
|
] |
|
|
|
|
|
train_cfg = dict(by_epoch=True, max_epochs=20, val_interval=5) |
|
val_cfg = dict() |
|
test_cfg = dict() |
|
|
|
optim_wrapper = dict( |
|
type='OptimWrapper', |
|
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.01), |
|
clip_grad=dict(max_norm=35, norm_type=2)) |
|
|
|
|
|
|
|
|
|
|
|
auto_scale_lr = dict(enable=False, base_batch_size=32) |
|
log_processor = dict(window_size=50) |
|
|
|
default_hooks = dict( |
|
logger=dict(type='LoggerHook', interval=50), |
|
checkpoint=dict(type='CheckpointHook', interval=5)) |
|
custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=15)] |
|
|