_base_ = './yolox_s_fast_8xb8-300e_coco.py' # ========================modified parameters====================== # Batch size of a single GPU during training # 8 -> 32 train_batch_size_per_gpu = 32 # Multi-scale training intervals # 10 -> 1 batch_augments_interval = 1 # Last epoch number to switch training pipeline # 15 -> 20 num_last_epochs = 20 # Base learning rate for optim_wrapper. Corresponding to 8xb32=256 bs base_lr = 0.004 # SGD -> AdamW optim_wrapper = dict( _delete_=True, type='OptimWrapper', optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), paramwise_cfg=dict( norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) # 0.0001 -> 0.0002 ema_momentum = 0.0002 # ============================== Unmodified in most cases =================== model = dict( data_preprocessor=dict(batch_augments=[ dict( type='YOLOXBatchSyncRandomResize', random_size_range=(480, 800), size_divisor=32, interval=batch_augments_interval) ])) param_scheduler = [ dict( # use quadratic formula to warm up 5 epochs # and lr is updated by iteration # TODO: fix default scope in get function type='mmdet.QuadraticWarmupLR', by_epoch=True, begin=0, end=5, convert_to_iter_based=True), dict( # use cosine lr from 5 to 285 epoch type='CosineAnnealingLR', eta_min=base_lr * 0.05, begin=5, T_max=_base_.max_epochs - num_last_epochs, end=_base_.max_epochs - num_last_epochs, by_epoch=True, convert_to_iter_based=True), dict( # use fixed lr during last num_last_epochs epochs type='ConstantLR', by_epoch=True, factor=1, begin=_base_.max_epochs - num_last_epochs, end=_base_.max_epochs, ) ] custom_hooks = [ dict( type='YOLOXModeSwitchHook', num_last_epochs=num_last_epochs, new_train_pipeline=_base_.train_pipeline_stage2, priority=48), dict(type='mmdet.SyncNormHook', priority=48), dict( type='EMAHook', ema_type='ExpMomentumEMA', momentum=ema_momentum, update_buffers=True, strict_load=False, priority=49) ] train_dataloader = dict(batch_size=train_batch_size_per_gpu) train_cfg = dict(dynamic_intervals=[(_base_.max_epochs - num_last_epochs, 1)]) auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)