norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoderDiffusion', pretrained= 'work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_cityscapes20/best_mIoU_iter_72000.pth', backbone=dict( type='ResNetV1cCustomInitWeights', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), dilations=(1, 1, 2, 4), strides=(1, 2, 1, 1), norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False, style='pytorch', contract_dilation=True), decode_head=dict( type='DepthwiseSeparableASPPHeadUnetFCHeadMultiStep', pretrained= 'work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_cityscapes20/best_mIoU_iter_72000.pth', dim=128, out_dim=256, unet_channels=528, dim_mults=[1, 1, 1], cat_embedding_dim=16, ignore_index=0, diffusion_timesteps=100, collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99], in_channels=2048, in_index=3, channels=512, dilations=(1, 12, 24, 36), c1_in_channels=256, c1_channels=48, dropout_ratio=0.1, num_classes=20, norm_cfg=dict(type='SyncBN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), auxiliary_head=None, train_cfg=dict(), test_cfg=dict(mode='whole'), freeze_parameters=['backbone', 'decode_head']) dataset_type = 'Cityscapes20Dataset' data_root = 'data/cityscapes/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotationsCityscapes20'), dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=0), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(2048, 1024), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=2, train=dict( type='Cityscapes20Dataset', data_root='data/cityscapes/', img_dir='leftImg8bit/train', ann_dir='gtFine/train', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotationsCityscapes20'), dict( type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=0), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ]), val=dict( type='Cityscapes20Dataset', data_root='data/cityscapes/', img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(2048, 1024), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='Cityscapes20Dataset', data_root='data/cityscapes/', img_dir='leftImg8bit/val', ann_dir='gtFine/val', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(2048, 1024), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] cudnn_benchmark = True optimizer = dict( type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045) optimizer_config = dict() lr_config = dict( policy='step', warmup='linear', warmup_iters=1000, warmup_ratio=1e-06, step=20000, gamma=0.5, min_lr=1e-06, by_epoch=False) runner = dict(type='IterBasedRunner', max_iters=160000) checkpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1) evaluation = dict( interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU') checkpoint = 'work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_cityscapes20/best_mIoU_iter_72000.pth' custom_hooks = [ dict( type='ConstantMomentumEMAHook', momentum=0.01, interval=25, eval_interval=16000, auto_resume=True, priority=49) ] work_dir = './work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_cityscapes20_finetune' gpu_ids = range(0, 8) auto_resume = True