Spaces:

KyanChen
/

TTP

Runtime error

App Files Files Community

TTP / mmdet /configs /dino /dino_4scale_r50_8xb2_12e_coco.py

KyanChen

Upload 1861 files

3b96cb1 about 1 year ago

raw

history blame

7.03 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from mmcv.transforms import RandomChoice, RandomChoiceResize
	from mmcv.transforms.loading import LoadImageFromFile
	from mmengine.config import read_base
	from mmengine.model.weight_init import PretrainedInit
	from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
	from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
	from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
	from torch.nn.modules.batchnorm import BatchNorm2d
	from torch.nn.modules.normalization import GroupNorm
	from torch.optim.adamw import AdamW

	from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
	RandomCrop, RandomFlip, Resize)
	from mmdet.models import (DINO, ChannelMapper, DetDataPreprocessor, DINOHead,
	ResNet)
	from mmdet.models.losses.focal_loss import FocalLoss
	from mmdet.models.losses.iou_loss import GIoULoss
	from mmdet.models.losses.smooth_l1_loss import L1Loss
	from mmdet.models.task_modules import (BBoxL1Cost, FocalLossCost,
	HungarianAssigner, IoUCost)

	with read_base():
	from .._base_.datasets.coco_detection import *
	from .._base_.default_runtime import *

	model = dict(
	type=DINO,
	num_queries=900, # num_matching_queries
	with_box_refine=True,
	as_two_stage=True,
	data_preprocessor=dict(
	type=DetDataPreprocessor,
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375],
	bgr_to_rgb=True,
	pad_size_divisor=1),
	backbone=dict(
	type=ResNet,
	depth=50,
	num_stages=4,
	out_indices=(1, 2, 3),
	frozen_stages=1,
	norm_cfg=dict(type=BatchNorm2d, requires_grad=False),
	norm_eval=True,
	style='pytorch',
	init_cfg=dict(
	type=PretrainedInit, checkpoint='torchvision://resnet50')),
	neck=dict(
	type=ChannelMapper,
	in_channels=[512, 1024, 2048],
	kernel_size=1,
	out_channels=256,
	act_cfg=None,
	norm_cfg=dict(type=GroupNorm, num_groups=32),
	num_outs=4),
	encoder=dict(
	num_layers=6,
	layer_cfg=dict(
	self_attn_cfg=dict(embed_dims=256, num_levels=4,
	dropout=0.0), # 0.1 for DeformDETR
	ffn_cfg=dict(
	embed_dims=256,
	feedforward_channels=2048, # 1024 for DeformDETR
	ffn_drop=0.0))), # 0.1 for DeformDETR
	decoder=dict(
	num_layers=6,
	return_intermediate=True,
	layer_cfg=dict(
	self_attn_cfg=dict(embed_dims=256, num_heads=8,
	dropout=0.0), # 0.1 for DeformDETR
	cross_attn_cfg=dict(embed_dims=256, num_levels=4,
	dropout=0.0), # 0.1 for DeformDETR
	ffn_cfg=dict(
	embed_dims=256,
	feedforward_channels=2048, # 1024 for DeformDETR
	ffn_drop=0.0)), # 0.1 for DeformDETR
	post_norm_cfg=None),
	positional_encoding=dict(
	num_feats=128,
	normalize=True,
	offset=0.0, # -0.5 for DeformDETR
	temperature=20), # 10000 for DeformDETR
	bbox_head=dict(
	type=DINOHead,
	num_classes=80,
	sync_cls_avg_factor=True,
	loss_cls=dict(
	type=FocalLoss,
	use_sigmoid=True,
	gamma=2.0,
	alpha=0.25,
	loss_weight=1.0), # 2.0 in DeformDETR
	loss_bbox=dict(type=L1Loss, loss_weight=5.0),
	loss_iou=dict(type=GIoULoss, loss_weight=2.0)),
	dn_cfg=dict( # TODO: Move to model.train_cfg ?
	label_noise_scale=0.5,
	box_noise_scale=1.0, # 0.4 for DN-DETR
	group_cfg=dict(dynamic=True, num_groups=None,
	num_dn_queries=100)), # TODO: half num_dn_queries
	# training and testing settings
	train_cfg=dict(
	assigner=dict(
	type=HungarianAssigner,
	match_costs=[
	dict(type=FocalLossCost, weight=2.0),
	dict(type=BBoxL1Cost, weight=5.0, box_format='xywh'),
	dict(type=IoUCost, iou_mode='giou', weight=2.0)
	])),
	test_cfg=dict(max_per_img=300)) # 100 for DeformDETR

	# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
	# from the default setting in mmdet.
	train_pipeline = [
	dict(type=LoadImageFromFile, backend_args=backend_args),
	dict(type=LoadAnnotations, with_bbox=True),
	dict(type=RandomFlip, prob=0.5),
	dict(
	type=RandomChoice,
	transforms=[
	[
	dict(
	type=RandomChoiceResize,
	resize_type=Resize,
	scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
	(608, 1333), (640, 1333), (672, 1333), (704, 1333),
	(736, 1333), (768, 1333), (800, 1333)],
	keep_ratio=True)
	],
	[
	dict(
	type=RandomChoiceResize,
	resize_type=Resize,
	# The radio of all image in train dataset < 7
	# follow the original implement
	scales=[(400, 4200), (500, 4200), (600, 4200)],
	keep_ratio=True),
	dict(
	type=RandomCrop,
	crop_type='absolute_range',
	crop_size=(384, 600),
	allow_negative_crop=True),
	dict(
	type=RandomChoiceResize,
	resize_type=Resize,
	scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
	(608, 1333), (640, 1333), (672, 1333), (704, 1333),
	(736, 1333), (768, 1333), (800, 1333)],
	keep_ratio=True)
	]
	]),
	dict(type=PackDetInputs)
	]
	train_dataloader.update(
	dataset=dict(
	filter_cfg=dict(filter_empty_gt=False), pipeline=train_pipeline))

	# optimizer
	optim_wrapper = dict(
	type=OptimWrapper,
	optimizer=dict(
	type=AdamW,
	lr=0.0001, # 0.0002 for DeformDETR
	weight_decay=0.0001),
	clip_grad=dict(max_norm=0.1, norm_type=2),
	paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)})
	) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa

	# learning policy
	max_epochs = 12
	train_cfg = dict(
	type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=1)

	val_cfg = dict(type=ValLoop)
	test_cfg = dict(type=TestLoop)

	param_scheduler = [
	dict(
	type=MultiStepLR,
	begin=0,
	end=max_epochs,
	by_epoch=True,
	milestones=[11],
	gamma=0.1)
	]

	# NOTE: `auto_scale_lr` is for automatically scaling LR,
	# USER SHOULD NOT CHANGE ITS VALUES.
	# base_batch_size = (8 GPUs) x (2 samples per GPU)
	auto_scale_lr = dict(base_batch_size=16)