Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
from mmcv.transforms import RandomChoice, RandomChoiceResize | |
from mmcv.transforms.loading import LoadImageFromFile | |
from mmengine.config import read_base | |
from mmengine.model.weight_init import PretrainedInit | |
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper | |
from mmengine.optim.scheduler.lr_scheduler import MultiStepLR | |
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop | |
from torch.nn.modules.batchnorm import BatchNorm2d | |
from torch.nn.modules.normalization import GroupNorm | |
from torch.optim.adamw import AdamW | |
from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs, | |
RandomCrop, RandomFlip, Resize) | |
from mmdet.models import (DINO, ChannelMapper, DetDataPreprocessor, DINOHead, | |
ResNet) | |
from mmdet.models.losses.focal_loss import FocalLoss | |
from mmdet.models.losses.iou_loss import GIoULoss | |
from mmdet.models.losses.smooth_l1_loss import L1Loss | |
from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost, | |
HungarianAssigner, IoUCost) | |
with read_base(): | |
from .._base_.datasets.coco_detection import * | |
from .._base_.default_runtime import * | |
model = dict( | |
type=DINO, | |
num_queries=900, # num_matching_queries | |
with_box_refine=True, | |
as_two_stage=True, | |
data_preprocessor=dict( | |
type=DetDataPreprocessor, | |
mean=[123.675, 116.28, 103.53], | |
std=[58.395, 57.12, 57.375], | |
bgr_to_rgb=True, | |
pad_size_divisor=1), | |
backbone=dict( | |
type=ResNet, | |
depth=50, | |
num_stages=4, | |
out_indices=(1, 2, 3), | |
frozen_stages=1, | |
norm_cfg=dict(type=BatchNorm2d, requires_grad=False), | |
norm_eval=True, | |
style='pytorch', | |
init_cfg=dict( | |
type=PretrainedInit, checkpoint='torchvision://resnet50')), | |
neck=dict( | |
type=ChannelMapper, | |
in_channels=[512, 1024, 2048], | |
kernel_size=1, | |
out_channels=256, | |
act_cfg=None, | |
norm_cfg=dict(type=GroupNorm, num_groups=32), | |
num_outs=4), | |
encoder=dict( | |
num_layers=6, | |
layer_cfg=dict( | |
self_attn_cfg=dict(embed_dims=256, num_levels=4, | |
dropout=0.0), # 0.1 for DeformDETR | |
ffn_cfg=dict( | |
embed_dims=256, | |
feedforward_channels=2048, # 1024 for DeformDETR | |
ffn_drop=0.0))), # 0.1 for DeformDETR | |
decoder=dict( | |
num_layers=6, | |
return_intermediate=True, | |
layer_cfg=dict( | |
self_attn_cfg=dict(embed_dims=256, num_heads=8, | |
dropout=0.0), # 0.1 for DeformDETR | |
cross_attn_cfg=dict(embed_dims=256, num_levels=4, | |
dropout=0.0), # 0.1 for DeformDETR | |
ffn_cfg=dict( | |
embed_dims=256, | |
feedforward_channels=2048, # 1024 for DeformDETR | |
ffn_drop=0.0)), # 0.1 for DeformDETR | |
post_norm_cfg=None), | |
positional_encoding=dict( | |
num_feats=128, | |
normalize=True, | |
offset=0.0, # -0.5 for DeformDETR | |
temperature=20), # 10000 for DeformDETR | |
bbox_head=dict( | |
type=DINOHead, | |
num_classes=80, | |
sync_cls_avg_factor=True, | |
loss_cls=dict( | |
type=FocalLoss, | |
use_sigmoid=True, | |
gamma=2.0, | |
alpha=0.25, | |
loss_weight=1.0), # 2.0 in DeformDETR | |
loss_bbox=dict(type=L1Loss, loss_weight=5.0), | |
loss_iou=dict(type=GIoULoss, loss_weight=2.0)), | |
dn_cfg=dict( # TODO: Move to model.train_cfg ? | |
label_noise_scale=0.5, | |
box_noise_scale=1.0, # 0.4 for DN-DETR | |
group_cfg=dict(dynamic=True, num_groups=None, | |
num_dn_queries=100)), # TODO: half num_dn_queries | |
# training and testing settings | |
train_cfg=dict( | |
assigner=dict( | |
type=HungarianAssigner, | |
match_costs=[ | |
dict(type=FocalLossCost, weight=2.0), | |
dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'), | |
dict(type=IoUCost, iou_mode='giou', weight=2.0) | |
])), | |
test_cfg=dict(max_per_img=300)) # 100 for DeformDETR | |
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different | |
# from the default setting in mmdet. | |
train_pipeline = [ | |
dict(type=LoadImageFromFile, backend_args=backend_args), | |
dict(type=LoadAnnotations, with_bbox=True), | |
dict(type=RandomFlip, prob=0.5), | |
dict( | |
type=RandomChoice, | |
transforms=[ | |
[ | |
dict( | |
type=RandomChoiceResize, | |
resize_type=Resize, | |
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), | |
(608, 1333), (640, 1333), (672, 1333), (704, 1333), | |
(736, 1333), (768, 1333), (800, 1333)], | |
keep_ratio=True) | |
], | |
[ | |
dict( | |
type=RandomChoiceResize, | |
resize_type=Resize, | |
# The radio of all image in train dataset < 7 | |
# follow the original implement | |
scales=[(400, 4200), (500, 4200), (600, 4200)], | |
keep_ratio=True), | |
dict( | |
type=RandomCrop, | |
crop_type='absolute_range', | |
crop_size=(384, 600), | |
allow_negative_crop=True), | |
dict( | |
type=RandomChoiceResize, | |
resize_type=Resize, | |
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), | |
(608, 1333), (640, 1333), (672, 1333), (704, 1333), | |
(736, 1333), (768, 1333), (800, 1333)], | |
keep_ratio=True) | |
] | |
]), | |
dict(type=PackDetInputs) | |
] | |
train_dataloader.update( | |
dataset=dict( | |
filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline)) | |
# optimizer | |
optim_wrapper = dict( | |
type=OptimWrapper, | |
optimizer=dict( | |
type=AdamW, | |
lr=0.0001, # 0.0002 for DeformDETR | |
weight_decay=0.0001), | |
clip_grad=dict(max_norm=0.1, norm_type=2), | |
paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)}) | |
) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa | |
# learning policy | |
max_epochs = 12 | |
train_cfg = dict( | |
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1) | |
val_cfg = dict(type=ValLoop) | |
test_cfg = dict(type=TestLoop) | |
param_scheduler = [ | |
dict( | |
type=MultiStepLR, | |
begin=0, | |
end=max_epochs, | |
by_epoch=True, | |
milestones=[11], | |
gamma=0.1) | |
] | |
# NOTE: `auto_scale_lr` is for automatically scaling LR, | |
# USER SHOULD NOT CHANGE ITS VALUES. | |
# base_batch_size = (8 GPUs) x (2 samples per GPU) | |
auto_scale_lr = dict(base_batch_size=16) | |