|
_base_ = [ |
|
'../_base_/datasets/kitti-3d-3class.py', |
|
'../_base_/schedules/cyclic-40e.py', '../_base_/default_runtime.py' |
|
] |
|
|
|
voxel_size = [0.05, 0.05, 0.1] |
|
point_cloud_range = [0, -40, -3, 70.4, 40, 1] |
|
|
|
data_root = 'data/kitti/' |
|
class_names = ['Pedestrian', 'Cyclist', 'Car'] |
|
metainfo = dict(CLASSES=class_names) |
|
backend_args = None |
|
db_sampler = dict( |
|
data_root=data_root, |
|
info_path=data_root + 'kitti_dbinfos_train.pkl', |
|
rate=1.0, |
|
prepare=dict( |
|
filter_by_difficulty=[-1], |
|
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), |
|
classes=class_names, |
|
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), |
|
points_loader=dict( |
|
type='LoadPointsFromFile', |
|
coord_type='LIDAR', |
|
load_dim=4, |
|
use_dim=4, |
|
backend_args=backend_args), |
|
backend_args=backend_args) |
|
|
|
train_pipeline = [ |
|
dict( |
|
type='LoadPointsFromFile', |
|
coord_type='LIDAR', |
|
load_dim=4, |
|
use_dim=4, |
|
backend_args=backend_args), |
|
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), |
|
dict(type='ObjectSample', db_sampler=db_sampler, use_ground_plane=True), |
|
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), |
|
dict( |
|
type='GlobalRotScaleTrans', |
|
rot_range=[-0.78539816, 0.78539816], |
|
scale_ratio_range=[0.95, 1.05]), |
|
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), |
|
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), |
|
dict(type='PointShuffle'), |
|
dict( |
|
type='Pack3DDetInputs', |
|
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) |
|
] |
|
test_pipeline = [ |
|
dict( |
|
type='LoadPointsFromFile', |
|
coord_type='LIDAR', |
|
load_dim=4, |
|
use_dim=4, |
|
backend_args=backend_args), |
|
dict( |
|
type='MultiScaleFlipAug3D', |
|
img_scale=(1333, 800), |
|
pts_scale_ratio=1, |
|
flip=False, |
|
transforms=[ |
|
dict( |
|
type='GlobalRotScaleTrans', |
|
rot_range=[0, 0], |
|
scale_ratio_range=[1., 1.], |
|
translation_std=[0, 0, 0]), |
|
dict(type='RandomFlip3D'), |
|
dict( |
|
type='PointsRangeFilter', point_cloud_range=point_cloud_range) |
|
]), |
|
dict(type='Pack3DDetInputs', keys=['points']) |
|
] |
|
|
|
model = dict( |
|
type='PointVoxelRCNN', |
|
data_preprocessor=dict( |
|
type='Det3DDataPreprocessor', |
|
voxel=True, |
|
voxel_layer=dict( |
|
max_num_points=5, |
|
point_cloud_range=point_cloud_range, |
|
voxel_size=voxel_size, |
|
max_voxels=(16000, 40000))), |
|
voxel_encoder=dict(type='HardSimpleVFE'), |
|
middle_encoder=dict( |
|
type='SparseEncoder', |
|
in_channels=4, |
|
sparse_shape=[41, 1600, 1408], |
|
order=('conv', 'norm', 'act'), |
|
encoder_paddings=((0, 0, 0), ((1, 1, 1), 0, 0), ((1, 1, 1), 0, 0), |
|
((0, 1, 1), 0, 0)), |
|
return_middle_feats=True), |
|
points_encoder=dict( |
|
type='VoxelSetAbstraction', |
|
num_keypoints=2048, |
|
fused_out_channel=128, |
|
voxel_size=voxel_size, |
|
point_cloud_range=point_cloud_range, |
|
voxel_sa_cfgs_list=[ |
|
dict( |
|
type='StackedSAModuleMSG', |
|
in_channels=16, |
|
scale_factor=1, |
|
radius=(0.4, 0.8), |
|
sample_nums=(16, 16), |
|
mlp_channels=((16, 16), (16, 16)), |
|
use_xyz=True), |
|
dict( |
|
type='StackedSAModuleMSG', |
|
in_channels=32, |
|
scale_factor=2, |
|
radius=(0.8, 1.2), |
|
sample_nums=(16, 32), |
|
mlp_channels=((32, 32), (32, 32)), |
|
use_xyz=True), |
|
dict( |
|
type='StackedSAModuleMSG', |
|
in_channels=64, |
|
scale_factor=4, |
|
radius=(1.2, 2.4), |
|
sample_nums=(16, 32), |
|
mlp_channels=((64, 64), (64, 64)), |
|
use_xyz=True), |
|
dict( |
|
type='StackedSAModuleMSG', |
|
in_channels=64, |
|
scale_factor=8, |
|
radius=(2.4, 4.8), |
|
sample_nums=(16, 32), |
|
mlp_channels=((64, 64), (64, 64)), |
|
use_xyz=True) |
|
], |
|
rawpoints_sa_cfgs=dict( |
|
type='StackedSAModuleMSG', |
|
in_channels=1, |
|
radius=(0.4, 0.8), |
|
sample_nums=(16, 16), |
|
mlp_channels=((16, 16), (16, 16)), |
|
use_xyz=True), |
|
bev_feat_channel=256, |
|
bev_scale_factor=8), |
|
backbone=dict( |
|
type='SECOND', |
|
in_channels=256, |
|
layer_nums=[5, 5], |
|
layer_strides=[1, 2], |
|
out_channels=[128, 256]), |
|
neck=dict( |
|
type='SECONDFPN', |
|
in_channels=[128, 256], |
|
upsample_strides=[1, 2], |
|
out_channels=[256, 256]), |
|
rpn_head=dict( |
|
type='PartA2RPNHead', |
|
num_classes=3, |
|
in_channels=512, |
|
feat_channels=512, |
|
use_direction_classifier=True, |
|
dir_offset=0.78539, |
|
anchor_generator=dict( |
|
type='Anchor3DRangeGenerator', |
|
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], |
|
[0, -40.0, -0.6, 70.4, 40.0, -0.6], |
|
[0, -40.0, -1.78, 70.4, 40.0, -1.78]], |
|
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], |
|
rotations=[0, 1.57], |
|
reshape_out=False), |
|
diff_rad_by_sin=True, |
|
assigner_per_size=True, |
|
assign_per_class=True, |
|
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), |
|
loss_cls=dict( |
|
type='mmdet.FocalLoss', |
|
use_sigmoid=True, |
|
gamma=2.0, |
|
alpha=0.25, |
|
loss_weight=1.0), |
|
loss_bbox=dict( |
|
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), |
|
loss_dir=dict( |
|
type='mmdet.CrossEntropyLoss', use_sigmoid=False, |
|
loss_weight=0.2)), |
|
roi_head=dict( |
|
type='PVRCNNRoiHead', |
|
num_classes=3, |
|
semantic_head=dict( |
|
type='ForegroundSegmentationHead', |
|
in_channels=640, |
|
extra_width=0.1, |
|
loss_seg=dict( |
|
type='mmdet.FocalLoss', |
|
use_sigmoid=True, |
|
reduction='sum', |
|
gamma=2.0, |
|
alpha=0.25, |
|
activated=True, |
|
loss_weight=1.0)), |
|
bbox_roi_extractor=dict( |
|
type='Batch3DRoIGridExtractor', |
|
grid_size=6, |
|
roi_layer=dict( |
|
type='StackedSAModuleMSG', |
|
in_channels=128, |
|
radius=(0.8, 1.6), |
|
sample_nums=(16, 16), |
|
mlp_channels=((64, 64), (64, 64)), |
|
use_xyz=True, |
|
pool_mod='max'), |
|
), |
|
bbox_head=dict( |
|
type='PVRCNNBBoxHead', |
|
in_channels=128, |
|
grid_size=6, |
|
num_classes=3, |
|
class_agnostic=True, |
|
shared_fc_channels=(256, 256), |
|
reg_channels=(256, 256), |
|
cls_channels=(256, 256), |
|
dropout_ratio=0.3, |
|
with_corner_loss=True, |
|
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), |
|
loss_bbox=dict( |
|
type='mmdet.SmoothL1Loss', |
|
beta=1.0 / 9.0, |
|
reduction='sum', |
|
loss_weight=1.0), |
|
loss_cls=dict( |
|
type='mmdet.CrossEntropyLoss', |
|
use_sigmoid=True, |
|
reduction='sum', |
|
loss_weight=1.0))), |
|
|
|
train_cfg=dict( |
|
rpn=dict( |
|
assigner=[ |
|
dict( |
|
type='Max3DIoUAssigner', |
|
iou_calculator=dict(type='BboxOverlapsNearest3D'), |
|
pos_iou_thr=0.5, |
|
neg_iou_thr=0.35, |
|
min_pos_iou=0.35, |
|
ignore_iof_thr=-1), |
|
dict( |
|
type='Max3DIoUAssigner', |
|
iou_calculator=dict(type='BboxOverlapsNearest3D'), |
|
pos_iou_thr=0.5, |
|
neg_iou_thr=0.35, |
|
min_pos_iou=0.35, |
|
ignore_iof_thr=-1), |
|
dict( |
|
type='Max3DIoUAssigner', |
|
iou_calculator=dict(type='BboxOverlapsNearest3D'), |
|
pos_iou_thr=0.6, |
|
neg_iou_thr=0.45, |
|
min_pos_iou=0.45, |
|
ignore_iof_thr=-1) |
|
], |
|
allowed_border=0, |
|
pos_weight=-1, |
|
debug=False), |
|
rpn_proposal=dict( |
|
nms_pre=9000, |
|
nms_post=512, |
|
max_num=512, |
|
nms_thr=0.8, |
|
score_thr=0, |
|
use_rotate_nms=True), |
|
rcnn=dict( |
|
assigner=[ |
|
dict( |
|
type='Max3DIoUAssigner', |
|
iou_calculator=dict( |
|
type='BboxOverlaps3D', coordinate='lidar'), |
|
pos_iou_thr=0.55, |
|
neg_iou_thr=0.55, |
|
min_pos_iou=0.55, |
|
ignore_iof_thr=-1), |
|
dict( |
|
type='Max3DIoUAssigner', |
|
iou_calculator=dict( |
|
type='BboxOverlaps3D', coordinate='lidar'), |
|
pos_iou_thr=0.55, |
|
neg_iou_thr=0.55, |
|
min_pos_iou=0.55, |
|
ignore_iof_thr=-1), |
|
dict( |
|
type='Max3DIoUAssigner', |
|
iou_calculator=dict( |
|
type='BboxOverlaps3D', coordinate='lidar'), |
|
pos_iou_thr=0.55, |
|
neg_iou_thr=0.55, |
|
min_pos_iou=0.55, |
|
ignore_iof_thr=-1) |
|
], |
|
sampler=dict( |
|
type='IoUNegPiecewiseSampler', |
|
num=128, |
|
pos_fraction=0.5, |
|
neg_piece_fractions=[0.8, 0.2], |
|
neg_iou_piece_thrs=[0.55, 0.1], |
|
neg_pos_ub=-1, |
|
add_gt_as_proposals=False, |
|
return_iou=True), |
|
cls_pos_thr=0.75, |
|
cls_neg_thr=0.25)), |
|
test_cfg=dict( |
|
rpn=dict( |
|
nms_pre=1024, |
|
nms_post=100, |
|
max_num=100, |
|
nms_thr=0.7, |
|
score_thr=0, |
|
use_rotate_nms=True), |
|
rcnn=dict( |
|
use_rotate_nms=True, |
|
use_raw_score=True, |
|
nms_thr=0.1, |
|
score_thr=0.1))) |
|
train_dataloader = dict( |
|
batch_size=2, |
|
num_workers=2, |
|
dataset=dict(dataset=dict(pipeline=train_pipeline, metainfo=metainfo))) |
|
test_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo)) |
|
eval_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo)) |
|
lr = 0.001 |
|
optim_wrapper = dict(optimizer=dict(lr=lr)) |
|
param_scheduler = [ |
|
|
|
|
|
|
|
|
|
dict( |
|
type='CosineAnnealingLR', |
|
T_max=15, |
|
eta_min=lr * 10, |
|
begin=0, |
|
end=15, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
dict( |
|
type='CosineAnnealingLR', |
|
T_max=25, |
|
eta_min=lr * 1e-4, |
|
begin=15, |
|
end=40, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
|
|
|
|
|
|
dict( |
|
type='CosineAnnealingMomentum', |
|
T_max=15, |
|
eta_min=0.85 / 0.95, |
|
begin=0, |
|
end=15, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
dict( |
|
type='CosineAnnealingMomentum', |
|
T_max=25, |
|
eta_min=1, |
|
begin=15, |
|
end=40, |
|
by_epoch=True, |
|
convert_to_iter_based=True) |
|
] |
|
|