aaronb commited on
Commit
6fa5e3d
1 Parent(s): 5fcbf51

Upload folder using huggingface_hub

Browse files
Files changed (37) hide show
  1. deeplabv3plus_r101_multistep/20230304_140016.log +0 -0
  2. deeplabv3plus_r101_multistep/20230304_140016.log.json +0 -0
  3. deeplabv3plus_r101_multistep/best_mIoU_iter_48000.pth +3 -0
  4. deeplabv3plus_r101_multistep/deeplabv3plus_r101-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune.py +195 -0
  5. deeplabv3plus_r101_multistep/iter_160000.pth +3 -0
  6. deeplabv3plus_r101_multistep/latest.pth +3 -0
  7. deeplabv3plus_r101_singlestep/20230303_203803.log +1100 -0
  8. deeplabv3plus_r101_singlestep/20230303_203803.log.json +3 -0
  9. deeplabv3plus_r101_singlestep/20230303_203941.log +0 -0
  10. deeplabv3plus_r101_singlestep/20230303_203941.log.json +0 -0
  11. deeplabv3plus_r101_singlestep/best_mIoU_iter_40000.pth +3 -0
  12. deeplabv3plus_r101_singlestep/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py +184 -0
  13. deeplabv3plus_r101_singlestep/iter_80000.pth +3 -0
  14. deeplabv3plus_r101_singlestep/latest.pth +3 -0
  15. deeplabv3plus_r50_multistep/20230303_205044.log +0 -0
  16. deeplabv3plus_r50_multistep/20230303_205044.log.json +0 -0
  17. deeplabv3plus_r50_multistep/best_mIoU_iter_48000.pth +3 -0
  18. deeplabv3plus_r50_multistep/deeplabv3plus_r50-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune.py +195 -0
  19. deeplabv3plus_r50_multistep/iter_160000.pth +3 -0
  20. deeplabv3plus_r50_multistep/latest.pth +3 -0
  21. deeplabv3plus_r50_singlestep/20230303_152127.log +0 -0
  22. deeplabv3plus_r50_singlestep/20230303_152127.log.json +0 -0
  23. deeplabv3plus_r50_singlestep/best_mIoU_iter_64000.pth +3 -0
  24. deeplabv3plus_r50_singlestep/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py +184 -0
  25. deeplabv3plus_r50_singlestep/iter_80000.pth +3 -0
  26. deeplabv3plus_r50_singlestep/latest.pth +3 -0
  27. segformer_b2_multistep/20230302_115140.log +0 -0
  28. segformer_b2_multistep/20230302_115140.log.json +0 -0
  29. segformer_b2_multistep/best_mIoU_iter_144000.pth +3 -0
  30. segformer_b2_multistep/eval_single_scale_20230303_091319.json +19 -0
  31. segformer_b2_multistep/iter_304000.pth +3 -0
  32. segformer_b2_multistep/latest.pth +3 -0
  33. segformer_b2_multistep/segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_t100.py +195 -0
  34. segformer_b2_singlestep/20230303_135933.log +1137 -0
  35. segformer_b2_singlestep/20230303_135933.log.json +1 -0
  36. segformer_b2_singlestep/iter_80000.pth +3 -0
  37. segformer_b2_singlestep/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py +184 -0
deeplabv3plus_r101_multistep/20230304_140016.log ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r101_multistep/20230304_140016.log.json ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r101_multistep/best_mIoU_iter_48000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e5393e98d1c209e19d4a67d05d4b36715040d293dde46b5441967899c2663aa
3
+ size 690868455
deeplabv3plus_r101_multistep/deeplabv3plus_r101-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ model = dict(
3
+ type='EncoderDecoderDiffusion',
4
+ pretrained=
5
+ 'work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151/latest.pth',
6
+ backbone=dict(
7
+ type='ResNetV1cCustomInitWeights',
8
+ depth=101,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DepthwiseSeparableASPPHeadUnetFCHeadMultiStep',
19
+ pretrained=
20
+ 'work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151/latest.pth',
21
+ dim=128,
22
+ out_dim=256,
23
+ unet_channels=528,
24
+ dim_mults=[1, 1, 1],
25
+ cat_embedding_dim=16,
26
+ ignore_index=0,
27
+ diffusion_timesteps=100,
28
+ collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99],
29
+ in_channels=2048,
30
+ in_index=3,
31
+ channels=512,
32
+ dilations=(1, 12, 24, 36),
33
+ c1_in_channels=256,
34
+ c1_channels=48,
35
+ dropout_ratio=0.1,
36
+ num_classes=151,
37
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
38
+ align_corners=False,
39
+ loss_decode=dict(
40
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
41
+ auxiliary_head=None,
42
+ train_cfg=dict(),
43
+ test_cfg=dict(mode='whole'),
44
+ freeze_parameters=['backbone', 'decode_head'])
45
+ dataset_type = 'ADE20K151Dataset'
46
+ data_root = 'data/ade/ADEChallengeData2016'
47
+ img_norm_cfg = dict(
48
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
49
+ crop_size = (512, 512)
50
+ train_pipeline = [
51
+ dict(type='LoadImageFromFile'),
52
+ dict(type='LoadAnnotations', reduce_zero_label=False),
53
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
54
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
55
+ dict(type='RandomFlip', prob=0.5),
56
+ dict(type='PhotoMetricDistortion'),
57
+ dict(
58
+ type='Normalize',
59
+ mean=[123.675, 116.28, 103.53],
60
+ std=[58.395, 57.12, 57.375],
61
+ to_rgb=True),
62
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
63
+ dict(type='DefaultFormatBundle'),
64
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
65
+ ]
66
+ test_pipeline = [
67
+ dict(type='LoadImageFromFile'),
68
+ dict(
69
+ type='MultiScaleFlipAug',
70
+ img_scale=(2048, 512),
71
+ flip=False,
72
+ transforms=[
73
+ dict(type='Resize', keep_ratio=True),
74
+ dict(type='RandomFlip'),
75
+ dict(
76
+ type='Normalize',
77
+ mean=[123.675, 116.28, 103.53],
78
+ std=[58.395, 57.12, 57.375],
79
+ to_rgb=True),
80
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
81
+ dict(type='ImageToTensor', keys=['img']),
82
+ dict(type='Collect', keys=['img'])
83
+ ])
84
+ ]
85
+ data = dict(
86
+ samples_per_gpu=4,
87
+ workers_per_gpu=4,
88
+ train=dict(
89
+ type='ADE20K151Dataset',
90
+ data_root='data/ade/ADEChallengeData2016',
91
+ img_dir='images/training',
92
+ ann_dir='annotations/training',
93
+ pipeline=[
94
+ dict(type='LoadImageFromFile'),
95
+ dict(type='LoadAnnotations', reduce_zero_label=False),
96
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
97
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
98
+ dict(type='RandomFlip', prob=0.5),
99
+ dict(type='PhotoMetricDistortion'),
100
+ dict(
101
+ type='Normalize',
102
+ mean=[123.675, 116.28, 103.53],
103
+ std=[58.395, 57.12, 57.375],
104
+ to_rgb=True),
105
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
106
+ dict(type='DefaultFormatBundle'),
107
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
108
+ ]),
109
+ val=dict(
110
+ type='ADE20K151Dataset',
111
+ data_root='data/ade/ADEChallengeData2016',
112
+ img_dir='images/validation',
113
+ ann_dir='annotations/validation',
114
+ pipeline=[
115
+ dict(type='LoadImageFromFile'),
116
+ dict(
117
+ type='MultiScaleFlipAug',
118
+ img_scale=(2048, 512),
119
+ flip=False,
120
+ transforms=[
121
+ dict(type='Resize', keep_ratio=True),
122
+ dict(type='RandomFlip'),
123
+ dict(
124
+ type='Normalize',
125
+ mean=[123.675, 116.28, 103.53],
126
+ std=[58.395, 57.12, 57.375],
127
+ to_rgb=True),
128
+ dict(
129
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
130
+ dict(type='ImageToTensor', keys=['img']),
131
+ dict(type='Collect', keys=['img'])
132
+ ])
133
+ ]),
134
+ test=dict(
135
+ type='ADE20K151Dataset',
136
+ data_root='data/ade/ADEChallengeData2016',
137
+ img_dir='images/validation',
138
+ ann_dir='annotations/validation',
139
+ pipeline=[
140
+ dict(type='LoadImageFromFile'),
141
+ dict(
142
+ type='MultiScaleFlipAug',
143
+ img_scale=(2048, 512),
144
+ flip=False,
145
+ transforms=[
146
+ dict(type='Resize', keep_ratio=True),
147
+ dict(type='RandomFlip'),
148
+ dict(
149
+ type='Normalize',
150
+ mean=[123.675, 116.28, 103.53],
151
+ std=[58.395, 57.12, 57.375],
152
+ to_rgb=True),
153
+ dict(
154
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
155
+ dict(type='ImageToTensor', keys=['img']),
156
+ dict(type='Collect', keys=['img'])
157
+ ])
158
+ ]))
159
+ log_config = dict(
160
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
161
+ dist_params = dict(backend='nccl')
162
+ log_level = 'INFO'
163
+ load_from = None
164
+ resume_from = None
165
+ workflow = [('train', 1)]
166
+ cudnn_benchmark = True
167
+ optimizer = dict(
168
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
169
+ optimizer_config = dict()
170
+ lr_config = dict(
171
+ policy='step',
172
+ warmup='linear',
173
+ warmup_iters=1000,
174
+ warmup_ratio=1e-06,
175
+ step=20000,
176
+ gamma=0.5,
177
+ min_lr=1e-06,
178
+ by_epoch=False)
179
+ runner = dict(type='IterBasedRunner', max_iters=160000)
180
+ checkpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1)
181
+ evaluation = dict(
182
+ interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU')
183
+ checkpoint = 'work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151/latest.pth'
184
+ custom_hooks = [
185
+ dict(
186
+ type='ConstantMomentumEMAHook',
187
+ momentum=0.01,
188
+ interval=25,
189
+ eval_interval=16000,
190
+ auto_resume=True,
191
+ priority=49)
192
+ ]
193
+ work_dir = './work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune'
194
+ gpu_ids = range(0, 8)
195
+ auto_resume = True
deeplabv3plus_r101_multistep/iter_160000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a350e8dadbf9837bff8006fe636be260d1f824d9c45222b26abf968d72ec1ea
3
+ size 690868455
deeplabv3plus_r101_multistep/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a350e8dadbf9837bff8006fe636be260d1f824d9c45222b26abf968d72ec1ea
3
+ size 690868455
deeplabv3plus_r101_singlestep/20230303_203803.log ADDED
@@ -0,0 +1,1100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-03 20:38:03,065 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-03 20:38:03,078 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-03 20:38:03,078 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-03 20:38:03,131 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+c844fc6
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-03 20:38:03,131 - mmseg - INFO - Distributed training: True
38
+ 2023-03-03 20:38:03,834 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ model = dict(
41
+ type='EncoderDecoderFreeze',
42
+ pretrained=
43
+ 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth',
44
+ backbone=dict(
45
+ type='ResNetV1cCustomInitWeights',
46
+ depth=101,
47
+ num_stages=4,
48
+ out_indices=(0, 1, 2, 3),
49
+ dilations=(1, 1, 2, 4),
50
+ strides=(1, 2, 1, 1),
51
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
52
+ norm_eval=False,
53
+ style='pytorch',
54
+ contract_dilation=True),
55
+ decode_head=dict(
56
+ type='DepthwiseSeparableASPPHeadUnetFCHeadSingleStep',
57
+ pretrained=
58
+ 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth',
59
+ dim=256,
60
+ out_dim=256,
61
+ unet_channels=528,
62
+ dim_mults=[1, 1, 1],
63
+ cat_embedding_dim=16,
64
+ ignore_index=0,
65
+ in_channels=2048,
66
+ in_index=3,
67
+ channels=512,
68
+ dilations=(1, 12, 24, 36),
69
+ c1_in_channels=256,
70
+ c1_channels=48,
71
+ dropout_ratio=0.1,
72
+ num_classes=151,
73
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
74
+ align_corners=False,
75
+ loss_decode=dict(
76
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
77
+ auxiliary_head=None,
78
+ train_cfg=dict(),
79
+ test_cfg=dict(mode='whole'),
80
+ freeze_parameters=['backbone', 'decode_head'])
81
+ dataset_type = 'ADE20K151Dataset'
82
+ data_root = 'data/ade/ADEChallengeData2016'
83
+ img_norm_cfg = dict(
84
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
85
+ crop_size = (512, 512)
86
+ train_pipeline = [
87
+ dict(type='LoadImageFromFile'),
88
+ dict(type='LoadAnnotations', reduce_zero_label=False),
89
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
90
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
91
+ dict(type='RandomFlip', prob=0.5),
92
+ dict(type='PhotoMetricDistortion'),
93
+ dict(
94
+ type='Normalize',
95
+ mean=[123.675, 116.28, 103.53],
96
+ std=[58.395, 57.12, 57.375],
97
+ to_rgb=True),
98
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
99
+ dict(type='DefaultFormatBundle'),
100
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
101
+ ]
102
+ test_pipeline = [
103
+ dict(type='LoadImageFromFile'),
104
+ dict(
105
+ type='MultiScaleFlipAug',
106
+ img_scale=(2048, 512),
107
+ flip=False,
108
+ transforms=[
109
+ dict(type='Resize', keep_ratio=True),
110
+ dict(type='RandomFlip'),
111
+ dict(
112
+ type='Normalize',
113
+ mean=[123.675, 116.28, 103.53],
114
+ std=[58.395, 57.12, 57.375],
115
+ to_rgb=True),
116
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
117
+ dict(type='ImageToTensor', keys=['img']),
118
+ dict(type='Collect', keys=['img'])
119
+ ])
120
+ ]
121
+ data = dict(
122
+ samples_per_gpu=4,
123
+ workers_per_gpu=4,
124
+ train=dict(
125
+ type='ADE20K151Dataset',
126
+ data_root='data/ade/ADEChallengeData2016',
127
+ img_dir='images/training',
128
+ ann_dir='annotations/training',
129
+ pipeline=[
130
+ dict(type='LoadImageFromFile'),
131
+ dict(type='LoadAnnotations', reduce_zero_label=False),
132
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
133
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
134
+ dict(type='RandomFlip', prob=0.5),
135
+ dict(type='PhotoMetricDistortion'),
136
+ dict(
137
+ type='Normalize',
138
+ mean=[123.675, 116.28, 103.53],
139
+ std=[58.395, 57.12, 57.375],
140
+ to_rgb=True),
141
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
142
+ dict(type='DefaultFormatBundle'),
143
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
144
+ ]),
145
+ val=dict(
146
+ type='ADE20K151Dataset',
147
+ data_root='data/ade/ADEChallengeData2016',
148
+ img_dir='images/validation',
149
+ ann_dir='annotations/validation',
150
+ pipeline=[
151
+ dict(type='LoadImageFromFile'),
152
+ dict(
153
+ type='MultiScaleFlipAug',
154
+ img_scale=(2048, 512),
155
+ flip=False,
156
+ transforms=[
157
+ dict(type='Resize', keep_ratio=True),
158
+ dict(type='RandomFlip'),
159
+ dict(
160
+ type='Normalize',
161
+ mean=[123.675, 116.28, 103.53],
162
+ std=[58.395, 57.12, 57.375],
163
+ to_rgb=True),
164
+ dict(
165
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
166
+ dict(type='ImageToTensor', keys=['img']),
167
+ dict(type='Collect', keys=['img'])
168
+ ])
169
+ ]),
170
+ test=dict(
171
+ type='ADE20K151Dataset',
172
+ data_root='data/ade/ADEChallengeData2016',
173
+ img_dir='images/validation',
174
+ ann_dir='annotations/validation',
175
+ pipeline=[
176
+ dict(type='LoadImageFromFile'),
177
+ dict(
178
+ type='MultiScaleFlipAug',
179
+ img_scale=(2048, 512),
180
+ flip=False,
181
+ transforms=[
182
+ dict(type='Resize', keep_ratio=True),
183
+ dict(type='RandomFlip'),
184
+ dict(
185
+ type='Normalize',
186
+ mean=[123.675, 116.28, 103.53],
187
+ std=[58.395, 57.12, 57.375],
188
+ to_rgb=True),
189
+ dict(
190
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
191
+ dict(type='ImageToTensor', keys=['img']),
192
+ dict(type='Collect', keys=['img'])
193
+ ])
194
+ ]))
195
+ log_config = dict(
196
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
197
+ dist_params = dict(backend='nccl')
198
+ log_level = 'INFO'
199
+ load_from = None
200
+ resume_from = None
201
+ workflow = [('train', 1)]
202
+ cudnn_benchmark = True
203
+ optimizer = dict(
204
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
205
+ optimizer_config = dict()
206
+ lr_config = dict(
207
+ policy='step',
208
+ warmup='linear',
209
+ warmup_iters=1000,
210
+ warmup_ratio=1e-06,
211
+ step=10000,
212
+ gamma=0.5,
213
+ min_lr=1e-06,
214
+ by_epoch=False)
215
+ runner = dict(type='IterBasedRunner', max_iters=80000)
216
+ checkpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=1)
217
+ evaluation = dict(
218
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
219
+ checkpoint = 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth'
220
+ work_dir = './work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151'
221
+ gpu_ids = range(0, 8)
222
+ auto_resume = True
223
+
224
+ 2023-03-03 20:38:08,218 - mmseg - INFO - Set random seed to 1819371145, deterministic: False
225
+ 2023-03-03 20:38:09,698 - mmseg - INFO - Parameters in backbone freezed!
226
+ 2023-03-03 20:38:09,699 - mmseg - INFO - Trainable parameters in DepthwiseSeparableASPPHeadUnetFCHeadSingleStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
227
+ 2023-03-03 20:38:09,699 - mmseg - INFO - Parameters in decode_head freezed!
228
+ 2023-03-03 20:38:09,741 - mmseg - INFO - load checkpoint from local path: pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth
229
+ 2023-03-03 20:38:10,251 - mmseg - WARNING - The model and loaded state dict do not match exactly
230
+
231
+ unexpected key in source state_dict: decode_head.conv_seg.weight, decode_head.conv_seg.bias, decode_head.image_pool.1.conv.weight, decode_head.image_pool.1.bn.weight, decode_head.image_pool.1.bn.bias, decode_head.image_pool.1.bn.running_mean, decode_head.image_pool.1.bn.running_var, decode_head.image_pool.1.bn.num_batches_tracked, decode_head.aspp_modules.0.conv.weight, decode_head.aspp_modules.0.bn.weight, decode_head.aspp_modules.0.bn.bias, decode_head.aspp_modules.0.bn.running_mean, decode_head.aspp_modules.0.bn.running_var, decode_head.aspp_modules.0.bn.num_batches_tracked, decode_head.aspp_modules.1.depthwise_conv.conv.weight, decode_head.aspp_modules.1.depthwise_conv.bn.weight, decode_head.aspp_modules.1.depthwise_conv.bn.bias, decode_head.aspp_modules.1.depthwise_conv.bn.running_mean, decode_head.aspp_modules.1.depthwise_conv.bn.running_var, decode_head.aspp_modules.1.depthwise_conv.bn.num_batches_tracked, decode_head.aspp_modules.1.pointwise_conv.conv.weight, decode_head.aspp_modules.1.pointwise_conv.bn.weight, decode_head.aspp_modules.1.pointwise_conv.bn.bias, decode_head.aspp_modules.1.pointwise_conv.bn.running_mean, decode_head.aspp_modules.1.pointwise_conv.bn.running_var, decode_head.aspp_modules.1.pointwise_conv.bn.num_batches_tracked, decode_head.aspp_modules.2.depthwise_conv.conv.weight, decode_head.aspp_modules.2.depthwise_conv.bn.weight, decode_head.aspp_modules.2.depthwise_conv.bn.bias, decode_head.aspp_modules.2.depthwise_conv.bn.running_mean, decode_head.aspp_modules.2.depthwise_conv.bn.running_var, decode_head.aspp_modules.2.depthwise_conv.bn.num_batches_tracked, decode_head.aspp_modules.2.pointwise_conv.conv.weight, decode_head.aspp_modules.2.pointwise_conv.bn.weight, decode_head.aspp_modules.2.pointwise_conv.bn.bias, decode_head.aspp_modules.2.pointwise_conv.bn.running_mean, decode_head.aspp_modules.2.pointwise_conv.bn.running_var, decode_head.aspp_modules.2.pointwise_conv.bn.num_batches_tracked, decode_head.aspp_modules.3.depthwise_conv.conv.weight, decode_head.aspp_modules.3.depthwise_conv.bn.weight, decode_head.aspp_modules.3.depthwise_conv.bn.bias, decode_head.aspp_modules.3.depthwise_conv.bn.running_mean, decode_head.aspp_modules.3.depthwise_conv.bn.running_var, decode_head.aspp_modules.3.depthwise_conv.bn.num_batches_tracked, decode_head.aspp_modules.3.pointwise_conv.conv.weight, decode_head.aspp_modules.3.pointwise_conv.bn.weight, decode_head.aspp_modules.3.pointwise_conv.bn.bias, decode_head.aspp_modules.3.pointwise_conv.bn.running_mean, decode_head.aspp_modules.3.pointwise_conv.bn.running_var, decode_head.aspp_modules.3.pointwise_conv.bn.num_batches_tracked, decode_head.bottleneck.conv.weight, decode_head.bottleneck.bn.weight, decode_head.bottleneck.bn.bias, decode_head.bottleneck.bn.running_mean, decode_head.bottleneck.bn.running_var, decode_head.bottleneck.bn.num_batches_tracked, decode_head.c1_bottleneck.conv.weight, decode_head.c1_bottleneck.bn.weight, decode_head.c1_bottleneck.bn.bias, decode_head.c1_bottleneck.bn.running_mean, decode_head.c1_bottleneck.bn.running_var, decode_head.c1_bottleneck.bn.num_batches_tracked, decode_head.sep_bottleneck.0.depthwise_conv.conv.weight, decode_head.sep_bottleneck.0.depthwise_conv.bn.weight, decode_head.sep_bottleneck.0.depthwise_conv.bn.bias, decode_head.sep_bottleneck.0.depthwise_conv.bn.running_mean, decode_head.sep_bottleneck.0.depthwise_conv.bn.running_var, decode_head.sep_bottleneck.0.depthwise_conv.bn.num_batches_tracked, decode_head.sep_bottleneck.0.pointwise_conv.conv.weight, decode_head.sep_bottleneck.0.pointwise_conv.bn.weight, decode_head.sep_bottleneck.0.pointwise_conv.bn.bias, decode_head.sep_bottleneck.0.pointwise_conv.bn.running_mean, decode_head.sep_bottleneck.0.pointwise_conv.bn.running_var, decode_head.sep_bottleneck.0.pointwise_conv.bn.num_batches_tracked, decode_head.sep_bottleneck.1.depthwise_conv.conv.weight, decode_head.sep_bottleneck.1.depthwise_conv.bn.weight, decode_head.sep_bottleneck.1.depthwise_conv.bn.bias, decode_head.sep_bottleneck.1.depthwise_conv.bn.running_mean, decode_head.sep_bottleneck.1.depthwise_conv.bn.running_var, decode_head.sep_bottleneck.1.depthwise_conv.bn.num_batches_tracked, decode_head.sep_bottleneck.1.pointwise_conv.conv.weight, decode_head.sep_bottleneck.1.pointwise_conv.bn.weight, decode_head.sep_bottleneck.1.pointwise_conv.bn.bias, decode_head.sep_bottleneck.1.pointwise_conv.bn.running_mean, decode_head.sep_bottleneck.1.pointwise_conv.bn.running_var, decode_head.sep_bottleneck.1.pointwise_conv.bn.num_batches_tracked, auxiliary_head.conv_seg.weight, auxiliary_head.conv_seg.bias, auxiliary_head.convs.0.conv.weight, auxiliary_head.convs.0.bn.weight, auxiliary_head.convs.0.bn.bias, auxiliary_head.convs.0.bn.running_mean, auxiliary_head.convs.0.bn.running_var, auxiliary_head.convs.0.bn.num_batches_tracked
232
+
233
+ 2023-03-03 20:38:10,285 - mmseg - INFO - load checkpoint from local path: pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth
234
+ 2023-03-03 20:38:10,813 - mmseg - WARNING - The model and loaded state dict do not match exactly
235
+
236
+ unexpected key in source state_dict: backbone.stem.0.weight, backbone.stem.1.weight, backbone.stem.1.bias, backbone.stem.1.running_mean, backbone.stem.1.running_var, backbone.stem.1.num_batches_tracked, backbone.stem.3.weight, backbone.stem.4.weight, backbone.stem.4.bias, backbone.stem.4.running_mean, backbone.stem.4.running_var, backbone.stem.4.num_batches_tracked, backbone.stem.6.weight, backbone.stem.7.weight, backbone.stem.7.bias, backbone.stem.7.running_mean, backbone.stem.7.running_var, backbone.stem.7.num_batches_tracked, backbone.layer1.0.conv1.weight, backbone.layer1.0.bn1.weight, backbone.layer1.0.bn1.bias, backbone.layer1.0.bn1.running_mean, backbone.layer1.0.bn1.running_var, backbone.layer1.0.bn1.num_batches_tracked, backbone.layer1.0.conv2.weight, backbone.layer1.0.bn2.weight, backbone.layer1.0.bn2.bias, backbone.layer1.0.bn2.running_mean, backbone.layer1.0.bn2.running_var, backbone.layer1.0.bn2.num_batches_tracked, backbone.layer1.0.conv3.weight, backbone.layer1.0.bn3.weight, backbone.layer1.0.bn3.bias, backbone.layer1.0.bn3.running_mean, backbone.layer1.0.bn3.running_var, backbone.layer1.0.bn3.num_batches_tracked, backbone.layer1.0.downsample.0.weight, backbone.layer1.0.downsample.1.weight, backbone.layer1.0.downsample.1.bias, backbone.layer1.0.downsample.1.running_mean, backbone.layer1.0.downsample.1.running_var, backbone.layer1.0.downsample.1.num_batches_tracked, backbone.layer1.1.conv1.weight, backbone.layer1.1.bn1.weight, backbone.layer1.1.bn1.bias, backbone.layer1.1.bn1.running_mean, backbone.layer1.1.bn1.running_var, backbone.layer1.1.bn1.num_batches_tracked, backbone.layer1.1.conv2.weight, backbone.layer1.1.bn2.weight, backbone.layer1.1.bn2.bias, backbone.layer1.1.bn2.running_mean, backbone.layer1.1.bn2.running_var, backbone.layer1.1.bn2.num_batches_tracked, backbone.layer1.1.conv3.weight, backbone.layer1.1.bn3.weight, backbone.layer1.1.bn3.bias, backbone.layer1.1.bn3.running_mean, backbone.layer1.1.bn3.running_var, backbone.layer1.1.bn3.num_batches_tracked, backbone.layer1.2.conv1.weight, backbone.layer1.2.bn1.weight, backbone.layer1.2.bn1.bias, backbone.layer1.2.bn1.running_mean, backbone.layer1.2.bn1.running_var, backbone.layer1.2.bn1.num_batches_tracked, backbone.layer1.2.conv2.weight, backbone.layer1.2.bn2.weight, backbone.layer1.2.bn2.bias, backbone.layer1.2.bn2.running_mean, backbone.layer1.2.bn2.running_var, backbone.layer1.2.bn2.num_batches_tracked, backbone.layer1.2.conv3.weight, backbone.layer1.2.bn3.weight, backbone.layer1.2.bn3.bias, backbone.layer1.2.bn3.running_mean, backbone.layer1.2.bn3.running_var, backbone.layer1.2.bn3.num_batches_tracked, backbone.layer2.0.conv1.weight, backbone.layer2.0.bn1.weight, backbone.layer2.0.bn1.bias, backbone.layer2.0.bn1.running_mean, backbone.layer2.0.bn1.running_var, backbone.layer2.0.bn1.num_batches_tracked, backbone.layer2.0.conv2.weight, backbone.layer2.0.bn2.weight, backbone.layer2.0.bn2.bias, backbone.layer2.0.bn2.running_mean, backbone.layer2.0.bn2.running_var, backbone.layer2.0.bn2.num_batches_tracked, backbone.layer2.0.conv3.weight, backbone.layer2.0.bn3.weight, backbone.layer2.0.bn3.bias, backbone.layer2.0.bn3.running_mean, backbone.layer2.0.bn3.running_var, backbone.layer2.0.bn3.num_batches_tracked, backbone.layer2.0.downsample.0.weight, backbone.layer2.0.downsample.1.weight, backbone.layer2.0.downsample.1.bias, backbone.layer2.0.downsample.1.running_mean, backbone.layer2.0.downsample.1.running_var, backbone.layer2.0.downsample.1.num_batches_tracked, backbone.layer2.1.conv1.weight, backbone.layer2.1.bn1.weight, backbone.layer2.1.bn1.bias, backbone.layer2.1.bn1.running_mean, backbone.layer2.1.bn1.running_var, backbone.layer2.1.bn1.num_batches_tracked, backbone.layer2.1.conv2.weight, backbone.layer2.1.bn2.weight, backbone.layer2.1.bn2.bias, backbone.layer2.1.bn2.running_mean, backbone.layer2.1.bn2.running_var, backbone.layer2.1.bn2.num_batches_tracked, backbone.layer2.1.conv3.weight, backbone.layer2.1.bn3.weight, backbone.layer2.1.bn3.bias, backbone.layer2.1.bn3.running_mean, backbone.layer2.1.bn3.running_var, backbone.layer2.1.bn3.num_batches_tracked, backbone.layer2.2.conv1.weight, backbone.layer2.2.bn1.weight, backbone.layer2.2.bn1.bias, backbone.layer2.2.bn1.running_mean, backbone.layer2.2.bn1.running_var, backbone.layer2.2.bn1.num_batches_tracked, backbone.layer2.2.conv2.weight, backbone.layer2.2.bn2.weight, backbone.layer2.2.bn2.bias, backbone.layer2.2.bn2.running_mean, backbone.layer2.2.bn2.running_var, backbone.layer2.2.bn2.num_batches_tracked, backbone.layer2.2.conv3.weight, backbone.layer2.2.bn3.weight, backbone.layer2.2.bn3.bias, backbone.layer2.2.bn3.running_mean, backbone.layer2.2.bn3.running_var, backbone.layer2.2.bn3.num_batches_tracked, backbone.layer2.3.conv1.weight, backbone.layer2.3.bn1.weight, backbone.layer2.3.bn1.bias, backbone.layer2.3.bn1.running_mean, backbone.layer2.3.bn1.running_var, backbone.layer2.3.bn1.num_batches_tracked, backbone.layer2.3.conv2.weight, backbone.layer2.3.bn2.weight, backbone.layer2.3.bn2.bias, backbone.layer2.3.bn2.running_mean, backbone.layer2.3.bn2.running_var, backbone.layer2.3.bn2.num_batches_tracked, backbone.layer2.3.conv3.weight, backbone.layer2.3.bn3.weight, backbone.layer2.3.bn3.bias, backbone.layer2.3.bn3.running_mean, backbone.layer2.3.bn3.running_var, backbone.layer2.3.bn3.num_batches_tracked, backbone.layer3.0.conv1.weight, backbone.layer3.0.bn1.weight, backbone.layer3.0.bn1.bias, backbone.layer3.0.bn1.running_mean, backbone.layer3.0.bn1.running_var, backbone.layer3.0.bn1.num_batches_tracked, backbone.layer3.0.conv2.weight, backbone.layer3.0.bn2.weight, backbone.layer3.0.bn2.bias, backbone.layer3.0.bn2.running_mean, backbone.layer3.0.bn2.running_var, backbone.layer3.0.bn2.num_batches_tracked, backbone.layer3.0.conv3.weight, backbone.layer3.0.bn3.weight, backbone.layer3.0.bn3.bias, backbone.layer3.0.bn3.running_mean, backbone.layer3.0.bn3.running_var, backbone.layer3.0.bn3.num_batches_tracked, backbone.layer3.0.downsample.0.weight, backbone.layer3.0.downsample.1.weight, backbone.layer3.0.downsample.1.bias, backbone.layer3.0.downsample.1.running_mean, backbone.layer3.0.downsample.1.running_var, backbone.layer3.0.downsample.1.num_batches_tracked, backbone.layer3.1.conv1.weight, backbone.layer3.1.bn1.weight, backbone.layer3.1.bn1.bias, backbone.layer3.1.bn1.running_mean, backbone.layer3.1.bn1.running_var, backbone.layer3.1.bn1.num_batches_tracked, backbone.layer3.1.conv2.weight, backbone.layer3.1.bn2.weight, backbone.layer3.1.bn2.bias, backbone.layer3.1.bn2.running_mean, backbone.layer3.1.bn2.running_var, backbone.layer3.1.bn2.num_batches_tracked, backbone.layer3.1.conv3.weight, backbone.layer3.1.bn3.weight, backbone.layer3.1.bn3.bias, backbone.layer3.1.bn3.running_mean, backbone.layer3.1.bn3.running_var, backbone.layer3.1.bn3.num_batches_tracked, backbone.layer3.2.conv1.weight, backbone.layer3.2.bn1.weight, backbone.layer3.2.bn1.bias, backbone.layer3.2.bn1.running_mean, backbone.layer3.2.bn1.running_var, backbone.layer3.2.bn1.num_batches_tracked, backbone.layer3.2.conv2.weight, backbone.layer3.2.bn2.weight, backbone.layer3.2.bn2.bias, backbone.layer3.2.bn2.running_mean, backbone.layer3.2.bn2.running_var, backbone.layer3.2.bn2.num_batches_tracked, backbone.layer3.2.conv3.weight, backbone.layer3.2.bn3.weight, backbone.layer3.2.bn3.bias, backbone.layer3.2.bn3.running_mean, backbone.layer3.2.bn3.running_var, backbone.layer3.2.bn3.num_batches_tracked, backbone.layer3.3.conv1.weight, backbone.layer3.3.bn1.weight, backbone.layer3.3.bn1.bias, backbone.layer3.3.bn1.running_mean, backbone.layer3.3.bn1.running_var, backbone.layer3.3.bn1.num_batches_tracked, backbone.layer3.3.conv2.weight, backbone.layer3.3.bn2.weight, backbone.layer3.3.bn2.bias, backbone.layer3.3.bn2.running_mean, backbone.layer3.3.bn2.running_var, backbone.layer3.3.bn2.num_batches_tracked, backbone.layer3.3.conv3.weight, backbone.layer3.3.bn3.weight, backbone.layer3.3.bn3.bias, backbone.layer3.3.bn3.running_mean, backbone.layer3.3.bn3.running_var, backbone.layer3.3.bn3.num_batches_tracked, backbone.layer3.4.conv1.weight, backbone.layer3.4.bn1.weight, backbone.layer3.4.bn1.bias, backbone.layer3.4.bn1.running_mean, backbone.layer3.4.bn1.running_var, backbone.layer3.4.bn1.num_batches_tracked, backbone.layer3.4.conv2.weight, backbone.layer3.4.bn2.weight, backbone.layer3.4.bn2.bias, backbone.layer3.4.bn2.running_mean, backbone.layer3.4.bn2.running_var, backbone.layer3.4.bn2.num_batches_tracked, backbone.layer3.4.conv3.weight, backbone.layer3.4.bn3.weight, backbone.layer3.4.bn3.bias, backbone.layer3.4.bn3.running_mean, backbone.layer3.4.bn3.running_var, backbone.layer3.4.bn3.num_batches_tracked, backbone.layer3.5.conv1.weight, backbone.layer3.5.bn1.weight, backbone.layer3.5.bn1.bias, backbone.layer3.5.bn1.running_mean, backbone.layer3.5.bn1.running_var, backbone.layer3.5.bn1.num_batches_tracked, backbone.layer3.5.conv2.weight, backbone.layer3.5.bn2.weight, backbone.layer3.5.bn2.bias, backbone.layer3.5.bn2.running_mean, backbone.layer3.5.bn2.running_var, backbone.layer3.5.bn2.num_batches_tracked, backbone.layer3.5.conv3.weight, backbone.layer3.5.bn3.weight, backbone.layer3.5.bn3.bias, backbone.layer3.5.bn3.running_mean, backbone.layer3.5.bn3.running_var, backbone.layer3.5.bn3.num_batches_tracked, backbone.layer3.6.conv1.weight, backbone.layer3.6.bn1.weight, backbone.layer3.6.bn1.bias, backbone.layer3.6.bn1.running_mean, backbone.layer3.6.bn1.running_var, backbone.layer3.6.bn1.num_batches_tracked, backbone.layer3.6.conv2.weight, backbone.layer3.6.bn2.weight, backbone.layer3.6.bn2.bias, backbone.layer3.6.bn2.running_mean, backbone.layer3.6.bn2.running_var, backbone.layer3.6.bn2.num_batches_tracked, backbone.layer3.6.conv3.weight, backbone.layer3.6.bn3.weight, backbone.layer3.6.bn3.bias, backbone.layer3.6.bn3.running_mean, backbone.layer3.6.bn3.running_var, backbone.layer3.6.bn3.num_batches_tracked, backbone.layer3.7.conv1.weight, backbone.layer3.7.bn1.weight, backbone.layer3.7.bn1.bias, backbone.layer3.7.bn1.running_mean, backbone.layer3.7.bn1.running_var, backbone.layer3.7.bn1.num_batches_tracked, backbone.layer3.7.conv2.weight, backbone.layer3.7.bn2.weight, backbone.layer3.7.bn2.bias, backbone.layer3.7.bn2.running_mean, backbone.layer3.7.bn2.running_var, backbone.layer3.7.bn2.num_batches_tracked, backbone.layer3.7.conv3.weight, backbone.layer3.7.bn3.weight, backbone.layer3.7.bn3.bias, backbone.layer3.7.bn3.running_mean, backbone.layer3.7.bn3.running_var, backbone.layer3.7.bn3.num_batches_tracked, backbone.layer3.8.conv1.weight, backbone.layer3.8.bn1.weight, backbone.layer3.8.bn1.bias, backbone.layer3.8.bn1.running_mean, backbone.layer3.8.bn1.running_var, backbone.layer3.8.bn1.num_batches_tracked, backbone.layer3.8.conv2.weight, backbone.layer3.8.bn2.weight, backbone.layer3.8.bn2.bias, backbone.layer3.8.bn2.running_mean, backbone.layer3.8.bn2.running_var, backbone.layer3.8.bn2.num_batches_tracked, backbone.layer3.8.conv3.weight, backbone.layer3.8.bn3.weight, backbone.layer3.8.bn3.bias, backbone.layer3.8.bn3.running_mean, backbone.layer3.8.bn3.running_var, backbone.layer3.8.bn3.num_batches_tracked, backbone.layer3.9.conv1.weight, backbone.layer3.9.bn1.weight, backbone.layer3.9.bn1.bias, backbone.layer3.9.bn1.running_mean, backbone.layer3.9.bn1.running_var, backbone.layer3.9.bn1.num_batches_tracked, backbone.layer3.9.conv2.weight, backbone.layer3.9.bn2.weight, backbone.layer3.9.bn2.bias, backbone.layer3.9.bn2.running_mean, backbone.layer3.9.bn2.running_var, backbone.layer3.9.bn2.num_batches_tracked, backbone.layer3.9.conv3.weight, backbone.layer3.9.bn3.weight, backbone.layer3.9.bn3.bias, backbone.layer3.9.bn3.running_mean, backbone.layer3.9.bn3.running_var, backbone.layer3.9.bn3.num_batches_tracked, backbone.layer3.10.conv1.weight, backbone.layer3.10.bn1.weight, backbone.layer3.10.bn1.bias, backbone.layer3.10.bn1.running_mean, backbone.layer3.10.bn1.running_var, backbone.layer3.10.bn1.num_batches_tracked, backbone.layer3.10.conv2.weight, backbone.layer3.10.bn2.weight, backbone.layer3.10.bn2.bias, backbone.layer3.10.bn2.running_mean, backbone.layer3.10.bn2.running_var, backbone.layer3.10.bn2.num_batches_tracked, backbone.layer3.10.conv3.weight, backbone.layer3.10.bn3.weight, backbone.layer3.10.bn3.bias, backbone.layer3.10.bn3.running_mean, backbone.layer3.10.bn3.running_var, backbone.layer3.10.bn3.num_batches_tracked, backbone.layer3.11.conv1.weight, backbone.layer3.11.bn1.weight, backbone.layer3.11.bn1.bias, backbone.layer3.11.bn1.running_mean, backbone.layer3.11.bn1.running_var, backbone.layer3.11.bn1.num_batches_tracked, backbone.layer3.11.conv2.weight, backbone.layer3.11.bn2.weight, backbone.layer3.11.bn2.bias, backbone.layer3.11.bn2.running_mean, backbone.layer3.11.bn2.running_var, backbone.layer3.11.bn2.num_batches_tracked, backbone.layer3.11.conv3.weight, backbone.layer3.11.bn3.weight, backbone.layer3.11.bn3.bias, backbone.layer3.11.bn3.running_mean, backbone.layer3.11.bn3.running_var, backbone.layer3.11.bn3.num_batches_tracked, backbone.layer3.12.conv1.weight, backbone.layer3.12.bn1.weight, backbone.layer3.12.bn1.bias, backbone.layer3.12.bn1.running_mean, backbone.layer3.12.bn1.running_var, backbone.layer3.12.bn1.num_batches_tracked, backbone.layer3.12.conv2.weight, backbone.layer3.12.bn2.weight, backbone.layer3.12.bn2.bias, backbone.layer3.12.bn2.running_mean, backbone.layer3.12.bn2.running_var, backbone.layer3.12.bn2.num_batches_tracked, backbone.layer3.12.conv3.weight, backbone.layer3.12.bn3.weight, backbone.layer3.12.bn3.bias, backbone.layer3.12.bn3.running_mean, backbone.layer3.12.bn3.running_var, backbone.layer3.12.bn3.num_batches_tracked, backbone.layer3.13.conv1.weight, backbone.layer3.13.bn1.weight, backbone.layer3.13.bn1.bias, backbone.layer3.13.bn1.running_mean, backbone.layer3.13.bn1.running_var, backbone.layer3.13.bn1.num_batches_tracked, backbone.layer3.13.conv2.weight, backbone.layer3.13.bn2.weight, backbone.layer3.13.bn2.bias, backbone.layer3.13.bn2.running_mean, backbone.layer3.13.bn2.running_var, backbone.layer3.13.bn2.num_batches_tracked, backbone.layer3.13.conv3.weight, backbone.layer3.13.bn3.weight, backbone.layer3.13.bn3.bias, backbone.layer3.13.bn3.running_mean, backbone.layer3.13.bn3.running_var, backbone.layer3.13.bn3.num_batches_tracked, backbone.layer3.14.conv1.weight, backbone.layer3.14.bn1.weight, backbone.layer3.14.bn1.bias, backbone.layer3.14.bn1.running_mean, backbone.layer3.14.bn1.running_var, backbone.layer3.14.bn1.num_batches_tracked, backbone.layer3.14.conv2.weight, backbone.layer3.14.bn2.weight, backbone.layer3.14.bn2.bias, backbone.layer3.14.bn2.running_mean, backbone.layer3.14.bn2.running_var, backbone.layer3.14.bn2.num_batches_tracked, backbone.layer3.14.conv3.weight, backbone.layer3.14.bn3.weight, backbone.layer3.14.bn3.bias, backbone.layer3.14.bn3.running_mean, backbone.layer3.14.bn3.running_var, backbone.layer3.14.bn3.num_batches_tracked, backbone.layer3.15.conv1.weight, backbone.layer3.15.bn1.weight, backbone.layer3.15.bn1.bias, backbone.layer3.15.bn1.running_mean, backbone.layer3.15.bn1.running_var, backbone.layer3.15.bn1.num_batches_tracked, backbone.layer3.15.conv2.weight, backbone.layer3.15.bn2.weight, backbone.layer3.15.bn2.bias, backbone.layer3.15.bn2.running_mean, backbone.layer3.15.bn2.running_var, backbone.layer3.15.bn2.num_batches_tracked, backbone.layer3.15.conv3.weight, backbone.layer3.15.bn3.weight, backbone.layer3.15.bn3.bias, backbone.layer3.15.bn3.running_mean, backbone.layer3.15.bn3.running_var, backbone.layer3.15.bn3.num_batches_tracked, backbone.layer3.16.conv1.weight, backbone.layer3.16.bn1.weight, backbone.layer3.16.bn1.bias, backbone.layer3.16.bn1.running_mean, backbone.layer3.16.bn1.running_var, backbone.layer3.16.bn1.num_batches_tracked, backbone.layer3.16.conv2.weight, backbone.layer3.16.bn2.weight, backbone.layer3.16.bn2.bias, backbone.layer3.16.bn2.running_mean, backbone.layer3.16.bn2.running_var, backbone.layer3.16.bn2.num_batches_tracked, backbone.layer3.16.conv3.weight, backbone.layer3.16.bn3.weight, backbone.layer3.16.bn3.bias, backbone.layer3.16.bn3.running_mean, backbone.layer3.16.bn3.running_var, backbone.layer3.16.bn3.num_batches_tracked, backbone.layer3.17.conv1.weight, backbone.layer3.17.bn1.weight, backbone.layer3.17.bn1.bias, backbone.layer3.17.bn1.running_mean, backbone.layer3.17.bn1.running_var, backbone.layer3.17.bn1.num_batches_tracked, backbone.layer3.17.conv2.weight, backbone.layer3.17.bn2.weight, backbone.layer3.17.bn2.bias, backbone.layer3.17.bn2.running_mean, backbone.layer3.17.bn2.running_var, backbone.layer3.17.bn2.num_batches_tracked, backbone.layer3.17.conv3.weight, backbone.layer3.17.bn3.weight, backbone.layer3.17.bn3.bias, backbone.layer3.17.bn3.running_mean, backbone.layer3.17.bn3.running_var, backbone.layer3.17.bn3.num_batches_tracked, backbone.layer3.18.conv1.weight, backbone.layer3.18.bn1.weight, backbone.layer3.18.bn1.bias, backbone.layer3.18.bn1.running_mean, backbone.layer3.18.bn1.running_var, backbone.layer3.18.bn1.num_batches_tracked, backbone.layer3.18.conv2.weight, backbone.layer3.18.bn2.weight, backbone.layer3.18.bn2.bias, backbone.layer3.18.bn2.running_mean, backbone.layer3.18.bn2.running_var, backbone.layer3.18.bn2.num_batches_tracked, backbone.layer3.18.conv3.weight, backbone.layer3.18.bn3.weight, backbone.layer3.18.bn3.bias, backbone.layer3.18.bn3.running_mean, backbone.layer3.18.bn3.running_var, backbone.layer3.18.bn3.num_batches_tracked, backbone.layer3.19.conv1.weight, backbone.layer3.19.bn1.weight, backbone.layer3.19.bn1.bias, backbone.layer3.19.bn1.running_mean, backbone.layer3.19.bn1.running_var, backbone.layer3.19.bn1.num_batches_tracked, backbone.layer3.19.conv2.weight, backbone.layer3.19.bn2.weight, backbone.layer3.19.bn2.bias, backbone.layer3.19.bn2.running_mean, backbone.layer3.19.bn2.running_var, backbone.layer3.19.bn2.num_batches_tracked, backbone.layer3.19.conv3.weight, backbone.layer3.19.bn3.weight, backbone.layer3.19.bn3.bias, backbone.layer3.19.bn3.running_mean, backbone.layer3.19.bn3.running_var, backbone.layer3.19.bn3.num_batches_tracked, backbone.layer3.20.conv1.weight, backbone.layer3.20.bn1.weight, backbone.layer3.20.bn1.bias, backbone.layer3.20.bn1.running_mean, backbone.layer3.20.bn1.running_var, backbone.layer3.20.bn1.num_batches_tracked, backbone.layer3.20.conv2.weight, backbone.layer3.20.bn2.weight, backbone.layer3.20.bn2.bias, backbone.layer3.20.bn2.running_mean, backbone.layer3.20.bn2.running_var, backbone.layer3.20.bn2.num_batches_tracked, backbone.layer3.20.conv3.weight, backbone.layer3.20.bn3.weight, backbone.layer3.20.bn3.bias, backbone.layer3.20.bn3.running_mean, backbone.layer3.20.bn3.running_var, backbone.layer3.20.bn3.num_batches_tracked, backbone.layer3.21.conv1.weight, backbone.layer3.21.bn1.weight, backbone.layer3.21.bn1.bias, backbone.layer3.21.bn1.running_mean, backbone.layer3.21.bn1.running_var, backbone.layer3.21.bn1.num_batches_tracked, backbone.layer3.21.conv2.weight, backbone.layer3.21.bn2.weight, backbone.layer3.21.bn2.bias, backbone.layer3.21.bn2.running_mean, backbone.layer3.21.bn2.running_var, backbone.layer3.21.bn2.num_batches_tracked, backbone.layer3.21.conv3.weight, backbone.layer3.21.bn3.weight, backbone.layer3.21.bn3.bias, backbone.layer3.21.bn3.running_mean, backbone.layer3.21.bn3.running_var, backbone.layer3.21.bn3.num_batches_tracked, backbone.layer3.22.conv1.weight, backbone.layer3.22.bn1.weight, backbone.layer3.22.bn1.bias, backbone.layer3.22.bn1.running_mean, backbone.layer3.22.bn1.running_var, backbone.layer3.22.bn1.num_batches_tracked, backbone.layer3.22.conv2.weight, backbone.layer3.22.bn2.weight, backbone.layer3.22.bn2.bias, backbone.layer3.22.bn2.running_mean, backbone.layer3.22.bn2.running_var, backbone.layer3.22.bn2.num_batches_tracked, backbone.layer3.22.conv3.weight, backbone.layer3.22.bn3.weight, backbone.layer3.22.bn3.bias, backbone.layer3.22.bn3.running_mean, backbone.layer3.22.bn3.running_var, backbone.layer3.22.bn3.num_batches_tracked, backbone.layer4.0.conv1.weight, backbone.layer4.0.bn1.weight, backbone.layer4.0.bn1.bias, backbone.layer4.0.bn1.running_mean, backbone.layer4.0.bn1.running_var, backbone.layer4.0.bn1.num_batches_tracked, backbone.layer4.0.conv2.weight, backbone.layer4.0.bn2.weight, backbone.layer4.0.bn2.bias, backbone.layer4.0.bn2.running_mean, backbone.layer4.0.bn2.running_var, backbone.layer4.0.bn2.num_batches_tracked, backbone.layer4.0.conv3.weight, backbone.layer4.0.bn3.weight, backbone.layer4.0.bn3.bias, backbone.layer4.0.bn3.running_mean, backbone.layer4.0.bn3.running_var, backbone.layer4.0.bn3.num_batches_tracked, backbone.layer4.0.downsample.0.weight, backbone.layer4.0.downsample.1.weight, backbone.layer4.0.downsample.1.bias, backbone.layer4.0.downsample.1.running_mean, backbone.layer4.0.downsample.1.running_var, backbone.layer4.0.downsample.1.num_batches_tracked, backbone.layer4.1.conv1.weight, backbone.layer4.1.bn1.weight, backbone.layer4.1.bn1.bias, backbone.layer4.1.bn1.running_mean, backbone.layer4.1.bn1.running_var, backbone.layer4.1.bn1.num_batches_tracked, backbone.layer4.1.conv2.weight, backbone.layer4.1.bn2.weight, backbone.layer4.1.bn2.bias, backbone.layer4.1.bn2.running_mean, backbone.layer4.1.bn2.running_var, backbone.layer4.1.bn2.num_batches_tracked, backbone.layer4.1.conv3.weight, backbone.layer4.1.bn3.weight, backbone.layer4.1.bn3.bias, backbone.layer4.1.bn3.running_mean, backbone.layer4.1.bn3.running_var, backbone.layer4.1.bn3.num_batches_tracked, backbone.layer4.2.conv1.weight, backbone.layer4.2.bn1.weight, backbone.layer4.2.bn1.bias, backbone.layer4.2.bn1.running_mean, backbone.layer4.2.bn1.running_var, backbone.layer4.2.bn1.num_batches_tracked, backbone.layer4.2.conv2.weight, backbone.layer4.2.bn2.weight, backbone.layer4.2.bn2.bias, backbone.layer4.2.bn2.running_mean, backbone.layer4.2.bn2.running_var, backbone.layer4.2.bn2.num_batches_tracked, backbone.layer4.2.conv3.weight, backbone.layer4.2.bn3.weight, backbone.layer4.2.bn3.bias, backbone.layer4.2.bn3.running_mean, backbone.layer4.2.bn3.running_var, backbone.layer4.2.bn3.num_batches_tracked, auxiliary_head.conv_seg.weight, auxiliary_head.conv_seg.bias, auxiliary_head.convs.0.conv.weight, auxiliary_head.convs.0.bn.weight, auxiliary_head.convs.0.bn.bias, auxiliary_head.convs.0.bn.running_mean, auxiliary_head.convs.0.bn.running_var, auxiliary_head.convs.0.bn.num_batches_tracked
237
+
238
+ missing keys in source state_dict: unet.init_conv.weight, unet.init_conv.bias, unet.time_mlp.1.weight, unet.time_mlp.1.bias, unet.time_mlp.3.weight, unet.time_mlp.3.bias, unet.downs.0.0.mlp.1.weight, unet.downs.0.0.mlp.1.bias, unet.downs.0.0.block1.proj.weight, unet.downs.0.0.block1.proj.bias, unet.downs.0.0.block1.norm.weight, unet.downs.0.0.block1.norm.bias, unet.downs.0.0.block2.proj.weight, unet.downs.0.0.block2.proj.bias, unet.downs.0.0.block2.norm.weight, unet.downs.0.0.block2.norm.bias, unet.downs.0.1.mlp.1.weight, unet.downs.0.1.mlp.1.bias, unet.downs.0.1.block1.proj.weight, unet.downs.0.1.block1.proj.bias, unet.downs.0.1.block1.norm.weight, unet.downs.0.1.block1.norm.bias, unet.downs.0.1.block2.proj.weight, unet.downs.0.1.block2.proj.bias, unet.downs.0.1.block2.norm.weight, unet.downs.0.1.block2.norm.bias, unet.downs.0.2.fn.fn.to_qkv.weight, unet.downs.0.2.fn.fn.to_out.0.weight, unet.downs.0.2.fn.fn.to_out.0.bias, unet.downs.0.2.fn.fn.to_out.1.g, unet.downs.0.2.fn.norm.g, unet.downs.0.3.weight, unet.downs.0.3.bias, unet.downs.1.0.mlp.1.weight, unet.downs.1.0.mlp.1.bias, unet.downs.1.0.block1.proj.weight, unet.downs.1.0.block1.proj.bias, unet.downs.1.0.block1.norm.weight, unet.downs.1.0.block1.norm.bias, unet.downs.1.0.block2.proj.weight, unet.downs.1.0.block2.proj.bias, unet.downs.1.0.block2.norm.weight, unet.downs.1.0.block2.norm.bias, unet.downs.1.1.mlp.1.weight, unet.downs.1.1.mlp.1.bias, unet.downs.1.1.block1.proj.weight, unet.downs.1.1.block1.proj.bias, unet.downs.1.1.block1.norm.weight, unet.downs.1.1.block1.norm.bias, unet.downs.1.1.block2.proj.weight, unet.downs.1.1.block2.proj.bias, unet.downs.1.1.block2.norm.weight, unet.downs.1.1.block2.norm.bias, unet.downs.1.2.fn.fn.to_qkv.weight, unet.downs.1.2.fn.fn.to_out.0.weight, unet.downs.1.2.fn.fn.to_out.0.bias, unet.downs.1.2.fn.fn.to_out.1.g, unet.downs.1.2.fn.norm.g, unet.downs.1.3.weight, unet.downs.1.3.bias, unet.downs.2.0.mlp.1.weight, unet.downs.2.0.mlp.1.bias, unet.downs.2.0.block1.proj.weight, unet.downs.2.0.block1.proj.bias, unet.downs.2.0.block1.norm.weight, unet.downs.2.0.block1.norm.bias, unet.downs.2.0.block2.proj.weight, unet.downs.2.0.block2.proj.bias, unet.downs.2.0.block2.norm.weight, unet.downs.2.0.block2.norm.bias, unet.downs.2.1.mlp.1.weight, unet.downs.2.1.mlp.1.bias, unet.downs.2.1.block1.proj.weight, unet.downs.2.1.block1.proj.bias, unet.downs.2.1.block1.norm.weight, unet.downs.2.1.block1.norm.bias, unet.downs.2.1.block2.proj.weight, unet.downs.2.1.block2.proj.bias, unet.downs.2.1.block2.norm.weight, unet.downs.2.1.block2.norm.bias, unet.downs.2.2.fn.fn.to_qkv.weight, unet.downs.2.2.fn.fn.to_out.0.weight, unet.downs.2.2.fn.fn.to_out.0.bias, unet.downs.2.2.fn.fn.to_out.1.g, unet.downs.2.2.fn.norm.g, unet.downs.2.3.weight, unet.downs.2.3.bias, unet.ups.0.0.mlp.1.weight, unet.ups.0.0.mlp.1.bias, unet.ups.0.0.block1.proj.weight, unet.ups.0.0.block1.proj.bias, unet.ups.0.0.block1.norm.weight, unet.ups.0.0.block1.norm.bias, unet.ups.0.0.block2.proj.weight, unet.ups.0.0.block2.proj.bias, unet.ups.0.0.block2.norm.weight, unet.ups.0.0.block2.norm.bias, unet.ups.0.0.res_conv.weight, unet.ups.0.0.res_conv.bias, unet.ups.0.1.mlp.1.weight, unet.ups.0.1.mlp.1.bias, unet.ups.0.1.block1.proj.weight, unet.ups.0.1.block1.proj.bias, unet.ups.0.1.block1.norm.weight, unet.ups.0.1.block1.norm.bias, unet.ups.0.1.block2.proj.weight, unet.ups.0.1.block2.proj.bias, unet.ups.0.1.block2.norm.weight, unet.ups.0.1.block2.norm.bias, unet.ups.0.1.res_conv.weight, unet.ups.0.1.res_conv.bias, unet.ups.0.2.fn.fn.to_qkv.weight, unet.ups.0.2.fn.fn.to_out.0.weight, unet.ups.0.2.fn.fn.to_out.0.bias, unet.ups.0.2.fn.fn.to_out.1.g, unet.ups.0.2.fn.norm.g, unet.ups.0.3.1.weight, unet.ups.0.3.1.bias, unet.ups.1.0.mlp.1.weight, unet.ups.1.0.mlp.1.bias, unet.ups.1.0.block1.proj.weight, unet.ups.1.0.block1.proj.bias, unet.ups.1.0.block1.norm.weight, unet.ups.1.0.block1.norm.bias, unet.ups.1.0.block2.proj.weight, unet.ups.1.0.block2.proj.bias, unet.ups.1.0.block2.norm.weight, unet.ups.1.0.block2.norm.bias, unet.ups.1.0.res_conv.weight, unet.ups.1.0.res_conv.bias, unet.ups.1.1.mlp.1.weight, unet.ups.1.1.mlp.1.bias, unet.ups.1.1.block1.proj.weight, unet.ups.1.1.block1.proj.bias, unet.ups.1.1.block1.norm.weight, unet.ups.1.1.block1.norm.bias, unet.ups.1.1.block2.proj.weight, unet.ups.1.1.block2.proj.bias, unet.ups.1.1.block2.norm.weight, unet.ups.1.1.block2.norm.bias, unet.ups.1.1.res_conv.weight, unet.ups.1.1.res_conv.bias, unet.ups.1.2.fn.fn.to_qkv.weight, unet.ups.1.2.fn.fn.to_out.0.weight, unet.ups.1.2.fn.fn.to_out.0.bias, unet.ups.1.2.fn.fn.to_out.1.g, unet.ups.1.2.fn.norm.g, unet.ups.1.3.1.weight, unet.ups.1.3.1.bias, unet.ups.2.0.mlp.1.weight, unet.ups.2.0.mlp.1.bias, unet.ups.2.0.block1.proj.weight, unet.ups.2.0.block1.proj.bias, unet.ups.2.0.block1.norm.weight, unet.ups.2.0.block1.norm.bias, unet.ups.2.0.block2.proj.weight, unet.ups.2.0.block2.proj.bias, unet.ups.2.0.block2.norm.weight, unet.ups.2.0.block2.norm.bias, unet.ups.2.0.res_conv.weight, unet.ups.2.0.res_conv.bias, unet.ups.2.1.mlp.1.weight, unet.ups.2.1.mlp.1.bias, unet.ups.2.1.block1.proj.weight, unet.ups.2.1.block1.proj.bias, unet.ups.2.1.block1.norm.weight, unet.ups.2.1.block1.norm.bias, unet.ups.2.1.block2.proj.weight, unet.ups.2.1.block2.proj.bias, unet.ups.2.1.block2.norm.weight, unet.ups.2.1.block2.norm.bias, unet.ups.2.1.res_conv.weight, unet.ups.2.1.res_conv.bias, unet.ups.2.2.fn.fn.to_qkv.weight, unet.ups.2.2.fn.fn.to_out.0.weight, unet.ups.2.2.fn.fn.to_out.0.bias, unet.ups.2.2.fn.fn.to_out.1.g, unet.ups.2.2.fn.norm.g, unet.ups.2.3.weight, unet.ups.2.3.bias, unet.mid_block1.mlp.1.weight, unet.mid_block1.mlp.1.bias, unet.mid_block1.block1.proj.weight, unet.mid_block1.block1.proj.bias, unet.mid_block1.block1.norm.weight, unet.mid_block1.block1.norm.bias, unet.mid_block1.block2.proj.weight, unet.mid_block1.block2.proj.bias, unet.mid_block1.block2.norm.weight, unet.mid_block1.block2.norm.bias, unet.mid_attn.fn.fn.to_qkv.weight, unet.mid_attn.fn.fn.to_out.weight, unet.mid_attn.fn.fn.to_out.bias, unet.mid_attn.fn.norm.g, unet.mid_block2.mlp.1.weight, unet.mid_block2.mlp.1.bias, unet.mid_block2.block1.proj.weight, unet.mid_block2.block1.proj.bias, unet.mid_block2.block1.norm.weight, unet.mid_block2.block1.norm.bias, unet.mid_block2.block2.proj.weight, unet.mid_block2.block2.proj.bias, unet.mid_block2.block2.norm.weight, unet.mid_block2.block2.norm.bias, unet.final_res_block.mlp.1.weight, unet.final_res_block.mlp.1.bias, unet.final_res_block.block1.proj.weight, unet.final_res_block.block1.proj.bias, unet.final_res_block.block1.norm.weight, unet.final_res_block.block1.norm.bias, unet.final_res_block.block2.proj.weight, unet.final_res_block.block2.proj.bias, unet.final_res_block.block2.norm.weight, unet.final_res_block.block2.norm.bias, unet.final_res_block.res_conv.weight, unet.final_res_block.res_conv.bias, unet.final_conv.weight, unet.final_conv.bias, conv_seg_new.weight, conv_seg_new.bias, embed.weight
239
+
240
+ 2023-03-03 20:38:10,885 - mmseg - INFO - EncoderDecoderFreeze(
241
+ (backbone): ResNetV1cCustomInitWeights(
242
+ (stem): Sequential(
243
+ (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
244
+ (1): SyncBatchNorm(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
245
+ (2): ReLU(inplace=True)
246
+ (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
247
+ (4): SyncBatchNorm(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
248
+ (5): ReLU(inplace=True)
249
+ (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
250
+ (7): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
251
+ (8): ReLU(inplace=True)
252
+ )
253
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
254
+ (layer1): ResLayer(
255
+ (0): Bottleneck(
256
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
257
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
258
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
259
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
260
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
261
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
262
+ (relu): ReLU(inplace=True)
263
+ (downsample): Sequential(
264
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
265
+ (1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
266
+ )
267
+ )
268
+ (1): Bottleneck(
269
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
270
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
271
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
272
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
273
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
274
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
275
+ (relu): ReLU(inplace=True)
276
+ )
277
+ (2): Bottleneck(
278
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
279
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
280
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
281
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
282
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
283
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
284
+ (relu): ReLU(inplace=True)
285
+ )
286
+ )
287
+ (layer2): ResLayer(
288
+ (0): Bottleneck(
289
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
290
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
291
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
292
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
293
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
294
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
295
+ (relu): ReLU(inplace=True)
296
+ (downsample): Sequential(
297
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
298
+ (1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
299
+ )
300
+ )
301
+ (1): Bottleneck(
302
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
303
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
304
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
305
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
306
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
307
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
308
+ (relu): ReLU(inplace=True)
309
+ )
310
+ (2): Bottleneck(
311
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
312
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
313
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
314
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
315
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
316
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
317
+ (relu): ReLU(inplace=True)
318
+ )
319
+ (3): Bottleneck(
320
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
321
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
322
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
323
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
324
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
325
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
326
+ (relu): ReLU(inplace=True)
327
+ )
328
+ )
329
+ (layer3): ResLayer(
330
+ (0): Bottleneck(
331
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
332
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
333
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
334
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
335
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
336
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
337
+ (relu): ReLU(inplace=True)
338
+ (downsample): Sequential(
339
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
340
+ (1): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
341
+ )
342
+ )
343
+ (1): Bottleneck(
344
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
345
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
346
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
347
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
348
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
349
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
350
+ (relu): ReLU(inplace=True)
351
+ )
352
+ (2): Bottleneck(
353
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
354
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
355
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
356
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
357
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
358
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
359
+ (relu): ReLU(inplace=True)
360
+ )
361
+ (3): Bottleneck(
362
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
363
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
364
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
365
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
366
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
367
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
368
+ (relu): ReLU(inplace=True)
369
+ )
370
+ (4): Bottleneck(
371
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
372
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
373
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
374
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
375
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
376
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
377
+ (relu): ReLU(inplace=True)
378
+ )
379
+ (5): Bottleneck(
380
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
381
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
382
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
383
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
384
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
385
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
386
+ (relu): ReLU(inplace=True)
387
+ )
388
+ (6): Bottleneck(
389
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
390
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
391
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
392
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
393
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
394
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
395
+ (relu): ReLU(inplace=True)
396
+ )
397
+ (7): Bottleneck(
398
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
399
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
400
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
401
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
402
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
403
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
404
+ (relu): ReLU(inplace=True)
405
+ )
406
+ (8): Bottleneck(
407
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
408
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
409
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
410
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
411
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
412
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
413
+ (relu): ReLU(inplace=True)
414
+ )
415
+ (9): Bottleneck(
416
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
417
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
418
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
419
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
420
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
421
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
422
+ (relu): ReLU(inplace=True)
423
+ )
424
+ (10): Bottleneck(
425
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
426
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
427
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
428
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
429
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
430
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
431
+ (relu): ReLU(inplace=True)
432
+ )
433
+ (11): Bottleneck(
434
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
435
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
436
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
437
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
438
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
439
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
440
+ (relu): ReLU(inplace=True)
441
+ )
442
+ (12): Bottleneck(
443
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
444
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
445
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
446
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
447
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
448
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
449
+ (relu): ReLU(inplace=True)
450
+ )
451
+ (13): Bottleneck(
452
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
453
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
454
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
455
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
456
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
457
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
458
+ (relu): ReLU(inplace=True)
459
+ )
460
+ (14): Bottleneck(
461
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
462
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
463
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
464
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
465
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
466
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
467
+ (relu): ReLU(inplace=True)
468
+ )
469
+ (15): Bottleneck(
470
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
471
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
472
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
473
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
474
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
475
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
476
+ (relu): ReLU(inplace=True)
477
+ )
478
+ (16): Bottleneck(
479
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
480
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
481
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
482
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
483
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
484
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
485
+ (relu): ReLU(inplace=True)
486
+ )
487
+ (17): Bottleneck(
488
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
489
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
490
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
491
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
492
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
493
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
494
+ (relu): ReLU(inplace=True)
495
+ )
496
+ (18): Bottleneck(
497
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
498
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
499
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
500
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
501
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
502
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
503
+ (relu): ReLU(inplace=True)
504
+ )
505
+ (19): Bottleneck(
506
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
507
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
508
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
509
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
510
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
511
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
512
+ (relu): ReLU(inplace=True)
513
+ )
514
+ (20): Bottleneck(
515
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
516
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
517
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
518
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
519
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
520
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
521
+ (relu): ReLU(inplace=True)
522
+ )
523
+ (21): Bottleneck(
524
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
525
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
526
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
527
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
528
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
529
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
530
+ (relu): ReLU(inplace=True)
531
+ )
532
+ (22): Bottleneck(
533
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
534
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
535
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
536
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
537
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
538
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
539
+ (relu): ReLU(inplace=True)
540
+ )
541
+ )
542
+ (layer4): ResLayer(
543
+ (0): Bottleneck(
544
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
545
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
546
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
547
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
548
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
549
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
550
+ (relu): ReLU(inplace=True)
551
+ (downsample): Sequential(
552
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
553
+ (1): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
554
+ )
555
+ )
556
+ (1): Bottleneck(
557
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
558
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
559
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(4, 4), dilation=(4, 4), bias=False)
560
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
561
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
562
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
563
+ (relu): ReLU(inplace=True)
564
+ )
565
+ (2): Bottleneck(
566
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
567
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
568
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(4, 4), dilation=(4, 4), bias=False)
569
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
570
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
571
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
572
+ (relu): ReLU(inplace=True)
573
+ )
574
+ )
575
+ )
576
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth'}
577
+ (decode_head): DepthwiseSeparableASPPHeadUnetFCHeadSingleStep(
578
+ input_transform=None, ignore_index=0, align_corners=False
579
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
580
+ (conv_seg): None
581
+ (dropout): Dropout2d(p=0.1, inplace=False)
582
+ (image_pool): Sequential(
583
+ (0): AdaptiveAvgPool2d(output_size=1)
584
+ (1): ConvModule(
585
+ (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
586
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
587
+ (activate): ReLU(inplace=True)
588
+ )
589
+ )
590
+ (aspp_modules): DepthwiseSeparableASPPModule(
591
+ (0): ConvModule(
592
+ (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
593
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
594
+ (activate): ReLU(inplace=True)
595
+ )
596
+ (1): DepthwiseSeparableConvModule(
597
+ (depthwise_conv): ConvModule(
598
+ (conv): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(12, 12), dilation=(12, 12), groups=2048, bias=False)
599
+ (bn): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
600
+ (activate): ReLU(inplace=True)
601
+ )
602
+ (pointwise_conv): ConvModule(
603
+ (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
604
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
605
+ (activate): ReLU(inplace=True)
606
+ )
607
+ )
608
+ (2): DepthwiseSeparableConvModule(
609
+ (depthwise_conv): ConvModule(
610
+ (conv): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(24, 24), dilation=(24, 24), groups=2048, bias=False)
611
+ (bn): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
612
+ (activate): ReLU(inplace=True)
613
+ )
614
+ (pointwise_conv): ConvModule(
615
+ (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
616
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
617
+ (activate): ReLU(inplace=True)
618
+ )
619
+ )
620
+ (3): DepthwiseSeparableConvModule(
621
+ (depthwise_conv): ConvModule(
622
+ (conv): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(36, 36), dilation=(36, 36), groups=2048, bias=False)
623
+ (bn): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
624
+ (activate): ReLU(inplace=True)
625
+ )
626
+ (pointwise_conv): ConvModule(
627
+ (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
628
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
629
+ (activate): ReLU(inplace=True)
630
+ )
631
+ )
632
+ )
633
+ (bottleneck): ConvModule(
634
+ (conv): Conv2d(2560, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
635
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
636
+ (activate): ReLU(inplace=True)
637
+ )
638
+ (c1_bottleneck): ConvModule(
639
+ (conv): Conv2d(256, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
640
+ (bn): SyncBatchNorm(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
641
+ (activate): ReLU(inplace=True)
642
+ )
643
+ (sep_bottleneck): Sequential(
644
+ (0): DepthwiseSeparableConvModule(
645
+ (depthwise_conv): ConvModule(
646
+ (conv): Conv2d(560, 560, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=560, bias=False)
647
+ (bn): SyncBatchNorm(560, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
648
+ (activate): ReLU(inplace=True)
649
+ )
650
+ (pointwise_conv): ConvModule(
651
+ (conv): Conv2d(560, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
652
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
653
+ (activate): ReLU(inplace=True)
654
+ )
655
+ )
656
+ (1): DepthwiseSeparableConvModule(
657
+ (depthwise_conv): ConvModule(
658
+ (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
659
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
660
+ (activate): ReLU(inplace=True)
661
+ )
662
+ (pointwise_conv): ConvModule(
663
+ (conv): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
664
+ (bn): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
665
+ (activate): ReLU(inplace=True)
666
+ )
667
+ )
668
+ )
669
+ (unet): Unet(
670
+ (init_conv): Conv2d(528, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
671
+ (time_mlp): Sequential(
672
+ (0): SinusoidalPosEmb()
673
+ (1): Linear(in_features=256, out_features=1024, bias=True)
674
+ (2): GELU(approximate='none')
675
+ (3): Linear(in_features=1024, out_features=1024, bias=True)
676
+ )
677
+ (downs): ModuleList(
678
+ (0): ModuleList(
679
+ (0): ResnetBlock(
680
+ (mlp): Sequential(
681
+ (0): SiLU()
682
+ (1): Linear(in_features=1024, out_features=512, bias=True)
683
+ )
684
+ (block1): Block(
685
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
686
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
687
+ (act): SiLU()
688
+ )
689
+ (block2): Block(
690
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
691
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
692
+ (act): SiLU()
693
+ )
694
+ (res_conv): Identity()
695
+ )
696
+ (1): ResnetBlock(
697
+ (mlp): Sequential(
698
+ (0): SiLU()
699
+ (1): Linear(in_features=1024, out_features=512, bias=True)
700
+ )
701
+ (block1): Block(
702
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
703
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
704
+ (act): SiLU()
705
+ )
706
+ (block2): Block(
707
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
708
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
709
+ (act): SiLU()
710
+ )
711
+ (res_conv): Identity()
712
+ )
713
+ (2): Residual(
714
+ (fn): PreNorm(
715
+ (fn): LinearAttention(
716
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
717
+ (to_out): Sequential(
718
+ (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
719
+ (1): LayerNorm()
720
+ )
721
+ )
722
+ (norm): LayerNorm()
723
+ )
724
+ )
725
+ (3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
726
+ )
727
+ (1): ModuleList(
728
+ (0): ResnetBlock(
729
+ (mlp): Sequential(
730
+ (0): SiLU()
731
+ (1): Linear(in_features=1024, out_features=512, bias=True)
732
+ )
733
+ (block1): Block(
734
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
735
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
736
+ (act): SiLU()
737
+ )
738
+ (block2): Block(
739
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
740
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
741
+ (act): SiLU()
742
+ )
743
+ (res_conv): Identity()
744
+ )
745
+ (1): ResnetBlock(
746
+ (mlp): Sequential(
747
+ (0): SiLU()
748
+ (1): Linear(in_features=1024, out_features=512, bias=True)
749
+ )
750
+ (block1): Block(
751
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
752
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
753
+ (act): SiLU()
754
+ )
755
+ (block2): Block(
756
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
757
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
758
+ (act): SiLU()
759
+ )
760
+ (res_conv): Identity()
761
+ )
762
+ (2): Residual(
763
+ (fn): PreNorm(
764
+ (fn): LinearAttention(
765
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
766
+ (to_out): Sequential(
767
+ (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
768
+ (1): LayerNorm()
769
+ )
770
+ )
771
+ (norm): LayerNorm()
772
+ )
773
+ )
774
+ (3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
775
+ )
776
+ (2): ModuleList(
777
+ (0): ResnetBlock(
778
+ (mlp): Sequential(
779
+ (0): SiLU()
780
+ (1): Linear(in_features=1024, out_features=512, bias=True)
781
+ )
782
+ (block1): Block(
783
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
784
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
785
+ (act): SiLU()
786
+ )
787
+ (block2): Block(
788
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
789
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
790
+ (act): SiLU()
791
+ )
792
+ (res_conv): Identity()
793
+ )
794
+ (1): ResnetBlock(
795
+ (mlp): Sequential(
796
+ (0): SiLU()
797
+ (1): Linear(in_features=1024, out_features=512, bias=True)
798
+ )
799
+ (block1): Block(
800
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
801
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
802
+ (act): SiLU()
803
+ )
804
+ (block2): Block(
805
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
806
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
807
+ (act): SiLU()
808
+ )
809
+ (res_conv): Identity()
810
+ )
811
+ (2): Residual(
812
+ (fn): PreNorm(
813
+ (fn): LinearAttention(
814
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
815
+ (to_out): Sequential(
816
+ (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
817
+ (1): LayerNorm()
818
+ )
819
+ )
820
+ (norm): LayerNorm()
821
+ )
822
+ )
823
+ (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
824
+ )
825
+ )
826
+ (ups): ModuleList(
827
+ (0): ModuleList(
828
+ (0): ResnetBlock(
829
+ (mlp): Sequential(
830
+ (0): SiLU()
831
+ (1): Linear(in_features=1024, out_features=512, bias=True)
832
+ )
833
+ (block1): Block(
834
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
835
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
836
+ (act): SiLU()
837
+ )
838
+ (block2): Block(
839
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
840
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
841
+ (act): SiLU()
842
+ )
843
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
844
+ )
845
+ (1): ResnetBlock(
846
+ (mlp): Sequential(
847
+ (0): SiLU()
848
+ (1): Linear(in_features=1024, out_features=512, bias=True)
849
+ )
850
+ (block1): Block(
851
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
852
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
853
+ (act): SiLU()
854
+ )
855
+ (block2): Block(
856
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
857
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
858
+ (act): SiLU()
859
+ )
860
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
861
+ )
862
+ (2): Residual(
863
+ (fn): PreNorm(
864
+ (fn): LinearAttention(
865
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
866
+ (to_out): Sequential(
867
+ (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
868
+ (1): LayerNorm()
869
+ )
870
+ )
871
+ (norm): LayerNorm()
872
+ )
873
+ )
874
+ (3): Sequential(
875
+ (0): Upsample(scale_factor=2.0, mode=nearest)
876
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
877
+ )
878
+ )
879
+ (1): ModuleList(
880
+ (0): ResnetBlock(
881
+ (mlp): Sequential(
882
+ (0): SiLU()
883
+ (1): Linear(in_features=1024, out_features=512, bias=True)
884
+ )
885
+ (block1): Block(
886
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
887
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
888
+ (act): SiLU()
889
+ )
890
+ (block2): Block(
891
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
892
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
893
+ (act): SiLU()
894
+ )
895
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
896
+ )
897
+ (1): ResnetBlock(
898
+ (mlp): Sequential(
899
+ (0): SiLU()
900
+ (1): Linear(in_features=1024, out_features=512, bias=True)
901
+ )
902
+ (block1): Block(
903
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
904
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
905
+ (act): SiLU()
906
+ )
907
+ (block2): Block(
908
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
909
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
910
+ (act): SiLU()
911
+ )
912
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
913
+ )
914
+ (2): Residual(
915
+ (fn): PreNorm(
916
+ (fn): LinearAttention(
917
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
918
+ (to_out): Sequential(
919
+ (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
920
+ (1): LayerNorm()
921
+ )
922
+ )
923
+ (norm): LayerNorm()
924
+ )
925
+ )
926
+ (3): Sequential(
927
+ (0): Upsample(scale_factor=2.0, mode=nearest)
928
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
929
+ )
930
+ )
931
+ (2): ModuleList(
932
+ (0): ResnetBlock(
933
+ (mlp): Sequential(
934
+ (0): SiLU()
935
+ (1): Linear(in_features=1024, out_features=512, bias=True)
936
+ )
937
+ (block1): Block(
938
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
939
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
940
+ (act): SiLU()
941
+ )
942
+ (block2): Block(
943
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
944
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
945
+ (act): SiLU()
946
+ )
947
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
948
+ )
949
+ (1): ResnetBlock(
950
+ (mlp): Sequential(
951
+ (0): SiLU()
952
+ (1): Linear(in_features=1024, out_features=512, bias=True)
953
+ )
954
+ (block1): Block(
955
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
956
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
957
+ (act): SiLU()
958
+ )
959
+ (block2): Block(
960
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
961
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
962
+ (act): SiLU()
963
+ )
964
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
965
+ )
966
+ (2): Residual(
967
+ (fn): PreNorm(
968
+ (fn): LinearAttention(
969
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
970
+ (to_out): Sequential(
971
+ (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
972
+ (1): LayerNorm()
973
+ )
974
+ )
975
+ (norm): LayerNorm()
976
+ )
977
+ )
978
+ (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
979
+ )
980
+ )
981
+ (mid_block1): ResnetBlock(
982
+ (mlp): Sequential(
983
+ (0): SiLU()
984
+ (1): Linear(in_features=1024, out_features=512, bias=True)
985
+ )
986
+ (block1): Block(
987
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
988
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
989
+ (act): SiLU()
990
+ )
991
+ (block2): Block(
992
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
993
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
994
+ (act): SiLU()
995
+ )
996
+ (res_conv): Identity()
997
+ )
998
+ (mid_attn): Residual(
999
+ (fn): PreNorm(
1000
+ (fn): Attention(
1001
+ (to_qkv): Conv2d(256, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1002
+ (to_out): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1003
+ )
1004
+ (norm): LayerNorm()
1005
+ )
1006
+ )
1007
+ (mid_block2): ResnetBlock(
1008
+ (mlp): Sequential(
1009
+ (0): SiLU()
1010
+ (1): Linear(in_features=1024, out_features=512, bias=True)
1011
+ )
1012
+ (block1): Block(
1013
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1014
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
1015
+ (act): SiLU()
1016
+ )
1017
+ (block2): Block(
1018
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1019
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
1020
+ (act): SiLU()
1021
+ )
1022
+ (res_conv): Identity()
1023
+ )
1024
+ (final_res_block): ResnetBlock(
1025
+ (mlp): Sequential(
1026
+ (0): SiLU()
1027
+ (1): Linear(in_features=1024, out_features=512, bias=True)
1028
+ )
1029
+ (block1): Block(
1030
+ (proj): WeightStandardizedConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1031
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
1032
+ (act): SiLU()
1033
+ )
1034
+ (block2): Block(
1035
+ (proj): WeightStandardizedConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1036
+ (norm): GroupNorm(8, 256, eps=1e-05, affine=True)
1037
+ (act): SiLU()
1038
+ )
1039
+ (res_conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
1040
+ )
1041
+ (final_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
1042
+ )
1043
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1044
+ (embed): Embedding(151, 16)
1045
+ )
1046
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth'}
1047
+ )
1048
+ 2023-03-03 20:38:11,641 - mmseg - INFO - Loaded 20210 images
1049
+ 2023-03-03 20:38:12,746 - mmseg - INFO - Loaded 2000 images
1050
+ 2023-03-03 20:38:12,750 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-139, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151
1051
+ 2023-03-03 20:38:12,750 - mmseg - INFO - Hooks will be executed in the following order:
1052
+ before_run:
1053
+ (VERY_HIGH ) StepLrUpdaterHook
1054
+ (NORMAL ) CheckpointHook
1055
+ (LOW ) DistEvalHookMultiSteps
1056
+ (VERY_LOW ) TextLoggerHook
1057
+ --------------------
1058
+ before_train_epoch:
1059
+ (VERY_HIGH ) StepLrUpdaterHook
1060
+ (LOW ) IterTimerHook
1061
+ (LOW ) DistEvalHookMultiSteps
1062
+ (VERY_LOW ) TextLoggerHook
1063
+ --------------------
1064
+ before_train_iter:
1065
+ (VERY_HIGH ) StepLrUpdaterHook
1066
+ (LOW ) IterTimerHook
1067
+ (LOW ) DistEvalHookMultiSteps
1068
+ --------------------
1069
+ after_train_iter:
1070
+ (ABOVE_NORMAL) OptimizerHook
1071
+ (NORMAL ) CheckpointHook
1072
+ (LOW ) IterTimerHook
1073
+ (LOW ) DistEvalHookMultiSteps
1074
+ (VERY_LOW ) TextLoggerHook
1075
+ --------------------
1076
+ after_train_epoch:
1077
+ (NORMAL ) CheckpointHook
1078
+ (LOW ) DistEvalHookMultiSteps
1079
+ (VERY_LOW ) TextLoggerHook
1080
+ --------------------
1081
+ before_val_epoch:
1082
+ (LOW ) IterTimerHook
1083
+ (VERY_LOW ) TextLoggerHook
1084
+ --------------------
1085
+ before_val_iter:
1086
+ (LOW ) IterTimerHook
1087
+ --------------------
1088
+ after_val_iter:
1089
+ (LOW ) IterTimerHook
1090
+ --------------------
1091
+ after_val_epoch:
1092
+ (VERY_LOW ) TextLoggerHook
1093
+ --------------------
1094
+ after_run:
1095
+ (VERY_LOW ) TextLoggerHook
1096
+ --------------------
1097
+ 2023-03-03 20:38:12,750 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
1098
+ 2023-03-03 20:38:12,751 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151 by HardDiskBackend.
1099
+ 2023-03-03 20:39:04,527 - mmseg - INFO - Iter [50/80000] lr: 7.350e-06, eta: 12:26:58, time: 0.561, data_time: 0.016, memory: 39544, decode.loss_ce: 3.5336, decode.acc_seg: 28.1587, loss: 3.5336
1100
+ 2023-03-03 20:39:19,367 - mmseg - INFO - Iter [100/80000] lr: 1.485e-05, eta: 9:30:52, time: 0.297, data_time: 0.007, memory: 39544, decode.loss_ce: 2.0701, decode.acc_seg: 58.4895, loss: 2.0701
deeplabv3plus_r101_singlestep/20230303_203803.log.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+c844fc6", "seed": 1819371145, "exp_name": "deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py", "mmseg_version": "0.30.0+c844fc6", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\nmodel = dict(\n type='EncoderDecoderFreeze',\n pretrained=\n 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth',\n backbone=dict(\n type='ResNetV1cCustomInitWeights',\n depth=101,\n num_stages=4,\n out_indices=(0, 1, 2, 3),\n dilations=(1, 1, 2, 4),\n strides=(1, 2, 1, 1),\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n norm_eval=False,\n style='pytorch',\n contract_dilation=True,\n pretrained=\n 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth'\n ),\n decode_head=dict(\n type='DepthwiseSeparableASPPHeadUnetFCHeadSingleStep',\n pretrained=\n 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth',\n dim=256,\n out_dim=256,\n unet_channels=528,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n ignore_index=0,\n in_channels=2048,\n in_index=3,\n channels=512,\n dilations=(1, 12, 24, 36),\n c1_in_channels=256,\n c1_channels=48,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n auxiliary_head=None,\n train_cfg=dict(),\n test_cfg=dict(mode='whole'),\n freeze_parameters=['backbone', 'decode_head'])\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=1)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\ncheckpoint = 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth'\nwork_dir = './work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 1819371145\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
2
+ {"mode": "train", "epoch": 1, "iter": 50, "lr": 1e-05, "memory": 39544, "data_time": 0.01566, "decode.loss_ce": 3.5336, "decode.acc_seg": 28.15875, "loss": 3.5336, "time": 0.56058}
3
+ {"mode": "train", "epoch": 1, "iter": 100, "lr": 1e-05, "memory": 39544, "data_time": 0.00705, "decode.loss_ce": 2.07009, "decode.acc_seg": 58.48949, "loss": 2.07009, "time": 0.2968}
deeplabv3plus_r101_singlestep/20230303_203941.log ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r101_singlestep/20230303_203941.log.json ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r101_singlestep/best_mIoU_iter_40000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b06c7b04aa5f60270c6170a931700d6af2a09d30ac2c1c36382bf73eed3424e0
3
+ size 770615576
deeplabv3plus_r101_singlestep/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ model = dict(
3
+ type='EncoderDecoderFreeze',
4
+ pretrained=
5
+ 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth',
6
+ backbone=dict(
7
+ type='ResNetV1cCustomInitWeights',
8
+ depth=101,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DepthwiseSeparableASPPHeadUnetFCHeadSingleStep',
19
+ pretrained=
20
+ 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth',
21
+ dim=256,
22
+ out_dim=256,
23
+ unet_channels=528,
24
+ dim_mults=[1, 1, 1],
25
+ cat_embedding_dim=16,
26
+ ignore_index=0,
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ dilations=(1, 12, 24, 36),
31
+ c1_in_channels=256,
32
+ c1_channels=48,
33
+ dropout_ratio=0.1,
34
+ num_classes=151,
35
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
36
+ align_corners=False,
37
+ loss_decode=dict(
38
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39
+ auxiliary_head=None,
40
+ train_cfg=dict(),
41
+ test_cfg=dict(mode='whole'),
42
+ freeze_parameters=['backbone', 'decode_head'])
43
+ dataset_type = 'ADE20K151Dataset'
44
+ data_root = 'data/ade/ADEChallengeData2016'
45
+ img_norm_cfg = dict(
46
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
47
+ crop_size = (512, 512)
48
+ train_pipeline = [
49
+ dict(type='LoadImageFromFile'),
50
+ dict(type='LoadAnnotations', reduce_zero_label=False),
51
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
52
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
53
+ dict(type='RandomFlip', prob=0.5),
54
+ dict(type='PhotoMetricDistortion'),
55
+ dict(
56
+ type='Normalize',
57
+ mean=[123.675, 116.28, 103.53],
58
+ std=[58.395, 57.12, 57.375],
59
+ to_rgb=True),
60
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
61
+ dict(type='DefaultFormatBundle'),
62
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
63
+ ]
64
+ test_pipeline = [
65
+ dict(type='LoadImageFromFile'),
66
+ dict(
67
+ type='MultiScaleFlipAug',
68
+ img_scale=(2048, 512),
69
+ flip=False,
70
+ transforms=[
71
+ dict(type='Resize', keep_ratio=True),
72
+ dict(type='RandomFlip'),
73
+ dict(
74
+ type='Normalize',
75
+ mean=[123.675, 116.28, 103.53],
76
+ std=[58.395, 57.12, 57.375],
77
+ to_rgb=True),
78
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
79
+ dict(type='ImageToTensor', keys=['img']),
80
+ dict(type='Collect', keys=['img'])
81
+ ])
82
+ ]
83
+ data = dict(
84
+ samples_per_gpu=4,
85
+ workers_per_gpu=4,
86
+ train=dict(
87
+ type='ADE20K151Dataset',
88
+ data_root='data/ade/ADEChallengeData2016',
89
+ img_dir='images/training',
90
+ ann_dir='annotations/training',
91
+ pipeline=[
92
+ dict(type='LoadImageFromFile'),
93
+ dict(type='LoadAnnotations', reduce_zero_label=False),
94
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
95
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
96
+ dict(type='RandomFlip', prob=0.5),
97
+ dict(type='PhotoMetricDistortion'),
98
+ dict(
99
+ type='Normalize',
100
+ mean=[123.675, 116.28, 103.53],
101
+ std=[58.395, 57.12, 57.375],
102
+ to_rgb=True),
103
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
104
+ dict(type='DefaultFormatBundle'),
105
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
106
+ ]),
107
+ val=dict(
108
+ type='ADE20K151Dataset',
109
+ data_root='data/ade/ADEChallengeData2016',
110
+ img_dir='images/validation',
111
+ ann_dir='annotations/validation',
112
+ pipeline=[
113
+ dict(type='LoadImageFromFile'),
114
+ dict(
115
+ type='MultiScaleFlipAug',
116
+ img_scale=(2048, 512),
117
+ flip=False,
118
+ transforms=[
119
+ dict(type='Resize', keep_ratio=True),
120
+ dict(type='RandomFlip'),
121
+ dict(
122
+ type='Normalize',
123
+ mean=[123.675, 116.28, 103.53],
124
+ std=[58.395, 57.12, 57.375],
125
+ to_rgb=True),
126
+ dict(
127
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
128
+ dict(type='ImageToTensor', keys=['img']),
129
+ dict(type='Collect', keys=['img'])
130
+ ])
131
+ ]),
132
+ test=dict(
133
+ type='ADE20K151Dataset',
134
+ data_root='data/ade/ADEChallengeData2016',
135
+ img_dir='images/validation',
136
+ ann_dir='annotations/validation',
137
+ pipeline=[
138
+ dict(type='LoadImageFromFile'),
139
+ dict(
140
+ type='MultiScaleFlipAug',
141
+ img_scale=(2048, 512),
142
+ flip=False,
143
+ transforms=[
144
+ dict(type='Resize', keep_ratio=True),
145
+ dict(type='RandomFlip'),
146
+ dict(
147
+ type='Normalize',
148
+ mean=[123.675, 116.28, 103.53],
149
+ std=[58.395, 57.12, 57.375],
150
+ to_rgb=True),
151
+ dict(
152
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
153
+ dict(type='ImageToTensor', keys=['img']),
154
+ dict(type='Collect', keys=['img'])
155
+ ])
156
+ ]))
157
+ log_config = dict(
158
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
159
+ dist_params = dict(backend='nccl')
160
+ log_level = 'INFO'
161
+ load_from = None
162
+ resume_from = None
163
+ workflow = [('train', 1)]
164
+ cudnn_benchmark = True
165
+ optimizer = dict(
166
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
167
+ optimizer_config = dict()
168
+ lr_config = dict(
169
+ policy='step',
170
+ warmup='linear',
171
+ warmup_iters=1000,
172
+ warmup_ratio=1e-06,
173
+ step=10000,
174
+ gamma=0.5,
175
+ min_lr=1e-06,
176
+ by_epoch=False)
177
+ runner = dict(type='IterBasedRunner', max_iters=80000)
178
+ checkpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=1)
179
+ evaluation = dict(
180
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
181
+ checkpoint = 'pretrained/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth'
182
+ work_dir = './work_dirs2/deeplabv3plus_r101-d8_aspp_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151'
183
+ gpu_ids = range(0, 8)
184
+ auto_resume = True
deeplabv3plus_r101_singlestep/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:263bd7c19856023af0cf1e1afc8836102394bd9803b305cc4da44a0618322337
3
+ size 770615576
deeplabv3plus_r101_singlestep/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:263bd7c19856023af0cf1e1afc8836102394bd9803b305cc4da44a0618322337
3
+ size 770615576
deeplabv3plus_r50_multistep/20230303_205044.log ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r50_multistep/20230303_205044.log.json ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r50_multistep/best_mIoU_iter_48000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:971982e043fc2dc4127fcf7406cd919b47832c17b86b09f57a3d192406e88343
3
+ size 538307211
deeplabv3plus_r50_multistep/deeplabv3plus_r50-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ model = dict(
3
+ type='EncoderDecoderDiffusion',
4
+ pretrained=
5
+ 'work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',
6
+ backbone=dict(
7
+ type='ResNetV1cCustomInitWeights',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DepthwiseSeparableASPPHeadUnetFCHeadMultiStep',
19
+ pretrained=
20
+ 'work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',
21
+ dim=128,
22
+ out_dim=256,
23
+ unet_channels=528,
24
+ dim_mults=[1, 1, 1],
25
+ cat_embedding_dim=16,
26
+ ignore_index=0,
27
+ diffusion_timesteps=100,
28
+ collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99],
29
+ in_channels=2048,
30
+ in_index=3,
31
+ channels=512,
32
+ dilations=(1, 12, 24, 36),
33
+ c1_in_channels=256,
34
+ c1_channels=48,
35
+ dropout_ratio=0.1,
36
+ num_classes=151,
37
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
38
+ align_corners=False,
39
+ loss_decode=dict(
40
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
41
+ auxiliary_head=None,
42
+ train_cfg=dict(),
43
+ test_cfg=dict(mode='whole'),
44
+ freeze_parameters=['backbone', 'decode_head'])
45
+ dataset_type = 'ADE20K151Dataset'
46
+ data_root = 'data/ade/ADEChallengeData2016'
47
+ img_norm_cfg = dict(
48
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
49
+ crop_size = (512, 512)
50
+ train_pipeline = [
51
+ dict(type='LoadImageFromFile'),
52
+ dict(type='LoadAnnotations', reduce_zero_label=False),
53
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
54
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
55
+ dict(type='RandomFlip', prob=0.5),
56
+ dict(type='PhotoMetricDistortion'),
57
+ dict(
58
+ type='Normalize',
59
+ mean=[123.675, 116.28, 103.53],
60
+ std=[58.395, 57.12, 57.375],
61
+ to_rgb=True),
62
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
63
+ dict(type='DefaultFormatBundle'),
64
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
65
+ ]
66
+ test_pipeline = [
67
+ dict(type='LoadImageFromFile'),
68
+ dict(
69
+ type='MultiScaleFlipAug',
70
+ img_scale=(2048, 512),
71
+ flip=False,
72
+ transforms=[
73
+ dict(type='Resize', keep_ratio=True),
74
+ dict(type='RandomFlip'),
75
+ dict(
76
+ type='Normalize',
77
+ mean=[123.675, 116.28, 103.53],
78
+ std=[58.395, 57.12, 57.375],
79
+ to_rgb=True),
80
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
81
+ dict(type='ImageToTensor', keys=['img']),
82
+ dict(type='Collect', keys=['img'])
83
+ ])
84
+ ]
85
+ data = dict(
86
+ samples_per_gpu=4,
87
+ workers_per_gpu=4,
88
+ train=dict(
89
+ type='ADE20K151Dataset',
90
+ data_root='data/ade/ADEChallengeData2016',
91
+ img_dir='images/training',
92
+ ann_dir='annotations/training',
93
+ pipeline=[
94
+ dict(type='LoadImageFromFile'),
95
+ dict(type='LoadAnnotations', reduce_zero_label=False),
96
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
97
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
98
+ dict(type='RandomFlip', prob=0.5),
99
+ dict(type='PhotoMetricDistortion'),
100
+ dict(
101
+ type='Normalize',
102
+ mean=[123.675, 116.28, 103.53],
103
+ std=[58.395, 57.12, 57.375],
104
+ to_rgb=True),
105
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
106
+ dict(type='DefaultFormatBundle'),
107
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
108
+ ]),
109
+ val=dict(
110
+ type='ADE20K151Dataset',
111
+ data_root='data/ade/ADEChallengeData2016',
112
+ img_dir='images/validation',
113
+ ann_dir='annotations/validation',
114
+ pipeline=[
115
+ dict(type='LoadImageFromFile'),
116
+ dict(
117
+ type='MultiScaleFlipAug',
118
+ img_scale=(2048, 512),
119
+ flip=False,
120
+ transforms=[
121
+ dict(type='Resize', keep_ratio=True),
122
+ dict(type='RandomFlip'),
123
+ dict(
124
+ type='Normalize',
125
+ mean=[123.675, 116.28, 103.53],
126
+ std=[58.395, 57.12, 57.375],
127
+ to_rgb=True),
128
+ dict(
129
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
130
+ dict(type='ImageToTensor', keys=['img']),
131
+ dict(type='Collect', keys=['img'])
132
+ ])
133
+ ]),
134
+ test=dict(
135
+ type='ADE20K151Dataset',
136
+ data_root='data/ade/ADEChallengeData2016',
137
+ img_dir='images/validation',
138
+ ann_dir='annotations/validation',
139
+ pipeline=[
140
+ dict(type='LoadImageFromFile'),
141
+ dict(
142
+ type='MultiScaleFlipAug',
143
+ img_scale=(2048, 512),
144
+ flip=False,
145
+ transforms=[
146
+ dict(type='Resize', keep_ratio=True),
147
+ dict(type='RandomFlip'),
148
+ dict(
149
+ type='Normalize',
150
+ mean=[123.675, 116.28, 103.53],
151
+ std=[58.395, 57.12, 57.375],
152
+ to_rgb=True),
153
+ dict(
154
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
155
+ dict(type='ImageToTensor', keys=['img']),
156
+ dict(type='Collect', keys=['img'])
157
+ ])
158
+ ]))
159
+ log_config = dict(
160
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
161
+ dist_params = dict(backend='nccl')
162
+ log_level = 'INFO'
163
+ load_from = None
164
+ resume_from = None
165
+ workflow = [('train', 1)]
166
+ cudnn_benchmark = True
167
+ optimizer = dict(
168
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
169
+ optimizer_config = dict()
170
+ lr_config = dict(
171
+ policy='step',
172
+ warmup='linear',
173
+ warmup_iters=1000,
174
+ warmup_ratio=1e-06,
175
+ step=20000,
176
+ gamma=0.5,
177
+ min_lr=1e-06,
178
+ by_epoch=False)
179
+ runner = dict(type='IterBasedRunner', max_iters=160000)
180
+ checkpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1)
181
+ evaluation = dict(
182
+ interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU')
183
+ checkpoint = 'work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'
184
+ custom_hooks = [
185
+ dict(
186
+ type='ConstantMomentumEMAHook',
187
+ momentum=0.01,
188
+ interval=25,
189
+ eval_interval=16000,
190
+ auto_resume=True,
191
+ priority=49)
192
+ ]
193
+ work_dir = './work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune'
194
+ gpu_ids = range(0, 8)
195
+ auto_resume = True
deeplabv3plus_r50_multistep/iter_160000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2b872510c01925ff55f43bf74197c8f95cf4d12adb0b7f3f5c1f8dd78d0520
3
+ size 538307275
deeplabv3plus_r50_multistep/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2b872510c01925ff55f43bf74197c8f95cf4d12adb0b7f3f5c1f8dd78d0520
3
+ size 538307275
deeplabv3plus_r50_singlestep/20230303_152127.log ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r50_singlestep/20230303_152127.log.json ADDED
The diff for this file is too large to render. See raw diff
 
deeplabv3plus_r50_singlestep/best_mIoU_iter_64000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1696a6290854ff78fdd815e6dd9ae448de193753f49557f28d1cf68d14ecbb14
3
+ size 321092234
deeplabv3plus_r50_singlestep/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ model = dict(
3
+ type='EncoderDecoderFreeze',
4
+ pretrained=
5
+ 'pretrained/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth',
6
+ backbone=dict(
7
+ type='ResNetV1cCustomInitWeights',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DepthwiseSeparableASPPHeadUnetFCHeadSingleStep',
19
+ pretrained=
20
+ 'pretrained/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth',
21
+ dim=128,
22
+ out_dim=256,
23
+ unet_channels=528,
24
+ dim_mults=[1, 1, 1],
25
+ cat_embedding_dim=16,
26
+ ignore_index=0,
27
+ in_channels=2048,
28
+ in_index=3,
29
+ channels=512,
30
+ dilations=(1, 12, 24, 36),
31
+ c1_in_channels=256,
32
+ c1_channels=48,
33
+ dropout_ratio=0.1,
34
+ num_classes=151,
35
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
36
+ align_corners=False,
37
+ loss_decode=dict(
38
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
39
+ auxiliary_head=None,
40
+ train_cfg=dict(),
41
+ test_cfg=dict(mode='whole'),
42
+ freeze_parameters=['backbone', 'decode_head'])
43
+ dataset_type = 'ADE20K151Dataset'
44
+ data_root = 'data/ade/ADEChallengeData2016'
45
+ img_norm_cfg = dict(
46
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
47
+ crop_size = (512, 512)
48
+ train_pipeline = [
49
+ dict(type='LoadImageFromFile'),
50
+ dict(type='LoadAnnotations', reduce_zero_label=False),
51
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
52
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
53
+ dict(type='RandomFlip', prob=0.5),
54
+ dict(type='PhotoMetricDistortion'),
55
+ dict(
56
+ type='Normalize',
57
+ mean=[123.675, 116.28, 103.53],
58
+ std=[58.395, 57.12, 57.375],
59
+ to_rgb=True),
60
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
61
+ dict(type='DefaultFormatBundle'),
62
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
63
+ ]
64
+ test_pipeline = [
65
+ dict(type='LoadImageFromFile'),
66
+ dict(
67
+ type='MultiScaleFlipAug',
68
+ img_scale=(2048, 512),
69
+ flip=False,
70
+ transforms=[
71
+ dict(type='Resize', keep_ratio=True),
72
+ dict(type='RandomFlip'),
73
+ dict(
74
+ type='Normalize',
75
+ mean=[123.675, 116.28, 103.53],
76
+ std=[58.395, 57.12, 57.375],
77
+ to_rgb=True),
78
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
79
+ dict(type='ImageToTensor', keys=['img']),
80
+ dict(type='Collect', keys=['img'])
81
+ ])
82
+ ]
83
+ data = dict(
84
+ samples_per_gpu=4,
85
+ workers_per_gpu=4,
86
+ train=dict(
87
+ type='ADE20K151Dataset',
88
+ data_root='data/ade/ADEChallengeData2016',
89
+ img_dir='images/training',
90
+ ann_dir='annotations/training',
91
+ pipeline=[
92
+ dict(type='LoadImageFromFile'),
93
+ dict(type='LoadAnnotations', reduce_zero_label=False),
94
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
95
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
96
+ dict(type='RandomFlip', prob=0.5),
97
+ dict(type='PhotoMetricDistortion'),
98
+ dict(
99
+ type='Normalize',
100
+ mean=[123.675, 116.28, 103.53],
101
+ std=[58.395, 57.12, 57.375],
102
+ to_rgb=True),
103
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
104
+ dict(type='DefaultFormatBundle'),
105
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
106
+ ]),
107
+ val=dict(
108
+ type='ADE20K151Dataset',
109
+ data_root='data/ade/ADEChallengeData2016',
110
+ img_dir='images/validation',
111
+ ann_dir='annotations/validation',
112
+ pipeline=[
113
+ dict(type='LoadImageFromFile'),
114
+ dict(
115
+ type='MultiScaleFlipAug',
116
+ img_scale=(2048, 512),
117
+ flip=False,
118
+ transforms=[
119
+ dict(type='Resize', keep_ratio=True),
120
+ dict(type='RandomFlip'),
121
+ dict(
122
+ type='Normalize',
123
+ mean=[123.675, 116.28, 103.53],
124
+ std=[58.395, 57.12, 57.375],
125
+ to_rgb=True),
126
+ dict(
127
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
128
+ dict(type='ImageToTensor', keys=['img']),
129
+ dict(type='Collect', keys=['img'])
130
+ ])
131
+ ]),
132
+ test=dict(
133
+ type='ADE20K151Dataset',
134
+ data_root='data/ade/ADEChallengeData2016',
135
+ img_dir='images/validation',
136
+ ann_dir='annotations/validation',
137
+ pipeline=[
138
+ dict(type='LoadImageFromFile'),
139
+ dict(
140
+ type='MultiScaleFlipAug',
141
+ img_scale=(2048, 512),
142
+ flip=False,
143
+ transforms=[
144
+ dict(type='Resize', keep_ratio=True),
145
+ dict(type='RandomFlip'),
146
+ dict(
147
+ type='Normalize',
148
+ mean=[123.675, 116.28, 103.53],
149
+ std=[58.395, 57.12, 57.375],
150
+ to_rgb=True),
151
+ dict(
152
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
153
+ dict(type='ImageToTensor', keys=['img']),
154
+ dict(type='Collect', keys=['img'])
155
+ ])
156
+ ]))
157
+ log_config = dict(
158
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
159
+ dist_params = dict(backend='nccl')
160
+ log_level = 'INFO'
161
+ load_from = None
162
+ resume_from = None
163
+ workflow = [('train', 1)]
164
+ cudnn_benchmark = True
165
+ optimizer = dict(
166
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
167
+ optimizer_config = dict()
168
+ lr_config = dict(
169
+ policy='step',
170
+ warmup='linear',
171
+ warmup_iters=1000,
172
+ warmup_ratio=1e-06,
173
+ step=10000,
174
+ gamma=0.5,
175
+ min_lr=1e-06,
176
+ by_epoch=False)
177
+ runner = dict(type='IterBasedRunner', max_iters=80000)
178
+ checkpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=1)
179
+ evaluation = dict(
180
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
181
+ checkpoint = 'pretrained/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth'
182
+ work_dir = './work_dirs2/deeplabv3plus_r50-d8_aspp_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151'
183
+ gpu_ids = range(0, 8)
184
+ auto_resume = True
deeplabv3plus_r50_singlestep/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8efa434ff9ad26a122dc69d10ddc102b878093a2411cfc98eb1375245e73bbcf
3
+ size 321092234
deeplabv3plus_r50_singlestep/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8efa434ff9ad26a122dc69d10ddc102b878093a2411cfc98eb1375245e73bbcf
3
+ size 321092234
segformer_b2_multistep/20230302_115140.log ADDED
The diff for this file is too large to render. See raw diff
 
segformer_b2_multistep/20230302_115140.log.json ADDED
The diff for this file is too large to render. See raw diff
 
segformer_b2_multistep/best_mIoU_iter_144000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:179ba567174f208aa40788c943ffdb10c34be5c99739f87735eabef643846388
3
+ size 852011827
segformer_b2_multistep/eval_single_scale_20230303_091319.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": "configs/exp_test/segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_t100.py",
3
+ "metric": {
4
+ "mIoU": [
5
+ 0.4926,
6
+ 0.4933,
7
+ 0.4939,
8
+ 0.4945,
9
+ 0.4947,
10
+ 0.4953,
11
+ 0.4956,
12
+ 0.496,
13
+ 0.496,
14
+ 0.4961,
15
+ 0.4965
16
+ ],
17
+ "copy_paste": "49.26,49.33,49.39,49.45,49.47,49.53,49.56,49.6,49.6,49.61,49.65"
18
+ }
19
+ }
segformer_b2_multistep/iter_304000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6199086ebcc3365db2cdb308f362fd1f4b9a118bfc5631c39e1f2d1a102b789d
3
+ size 852011827
segformer_b2_multistep/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6199086ebcc3365db2cdb308f362fd1f4b9a118bfc5631c39e1f2d1a102b789d
3
+ size 852011827
segformer_b2_multistep/segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_t100.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ checkpoint = 'work_dirs/segformer_mit_b2_segformer_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151/iter_80000.pth'
3
+ model = dict(
4
+ type='EncoderDecoderDiffusion',
5
+ freeze_parameters=['backbone', 'decode_head'],
6
+ pretrained=
7
+ 'work_dirs/segformer_mit_b2_segformer_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151/iter_80000.pth',
8
+ backbone=dict(
9
+ type='MixVisionTransformerCustomInitWeights',
10
+ in_channels=3,
11
+ embed_dims=64,
12
+ num_stages=4,
13
+ num_layers=[3, 4, 6, 3],
14
+ num_heads=[1, 2, 5, 8],
15
+ patch_sizes=[7, 3, 3, 3],
16
+ sr_ratios=[8, 4, 2, 1],
17
+ out_indices=(0, 1, 2, 3),
18
+ mlp_ratio=4,
19
+ qkv_bias=True,
20
+ drop_rate=0.0,
21
+ attn_drop_rate=0.0,
22
+ drop_path_rate=0.1),
23
+ decode_head=dict(
24
+ type='SegformerHeadUnetFCHeadMultiStep',
25
+ pretrained=
26
+ 'work_dirs/segformer_mit_b2_segformer_head_unet_fc_small_single_step_ade_pretrained_freeze_embed_80k_ade20k151/iter_80000.pth',
27
+ dim=256,
28
+ out_dim=256,
29
+ unet_channels=272,
30
+ dim_mults=[1, 1, 1],
31
+ cat_embedding_dim=16,
32
+ diffusion_timesteps=100,
33
+ collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90],
34
+ in_channels=[64, 128, 320, 512],
35
+ in_index=[0, 1, 2, 3],
36
+ channels=256,
37
+ dropout_ratio=0.1,
38
+ num_classes=151,
39
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
40
+ align_corners=False,
41
+ ignore_index=0,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
44
+ train_cfg=dict(),
45
+ test_cfg=dict(mode='whole'))
46
+ dataset_type = 'ADE20K151Dataset'
47
+ data_root = 'data/ade/ADEChallengeData2016'
48
+ img_norm_cfg = dict(
49
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
50
+ crop_size = (512, 512)
51
+ train_pipeline = [
52
+ dict(type='LoadImageFromFile'),
53
+ dict(type='LoadAnnotations', reduce_zero_label=False),
54
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
55
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
56
+ dict(type='RandomFlip', prob=0.5),
57
+ dict(type='PhotoMetricDistortion'),
58
+ dict(
59
+ type='Normalize',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ to_rgb=True),
63
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
64
+ dict(type='DefaultFormatBundle'),
65
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
66
+ ]
67
+ test_pipeline = [
68
+ dict(type='LoadImageFromFile'),
69
+ dict(
70
+ type='MultiScaleFlipAug',
71
+ img_scale=(2048, 512),
72
+ flip=False,
73
+ transforms=[
74
+ dict(type='Resize', keep_ratio=True),
75
+ dict(type='RandomFlip'),
76
+ dict(
77
+ type='Normalize',
78
+ mean=[123.675, 116.28, 103.53],
79
+ std=[58.395, 57.12, 57.375],
80
+ to_rgb=True),
81
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
82
+ dict(type='ImageToTensor', keys=['img']),
83
+ dict(type='Collect', keys=['img'])
84
+ ])
85
+ ]
86
+ data = dict(
87
+ samples_per_gpu=4,
88
+ workers_per_gpu=4,
89
+ train=dict(
90
+ type='ADE20K151Dataset',
91
+ data_root='data/ade/ADEChallengeData2016',
92
+ img_dir='images/training',
93
+ ann_dir='annotations/training',
94
+ pipeline=[
95
+ dict(type='LoadImageFromFile'),
96
+ dict(type='LoadAnnotations', reduce_zero_label=False),
97
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
98
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
99
+ dict(type='RandomFlip', prob=0.5),
100
+ dict(type='PhotoMetricDistortion'),
101
+ dict(
102
+ type='Normalize',
103
+ mean=[123.675, 116.28, 103.53],
104
+ std=[58.395, 57.12, 57.375],
105
+ to_rgb=True),
106
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
107
+ dict(type='DefaultFormatBundle'),
108
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
109
+ ]),
110
+ val=dict(
111
+ type='ADE20K151Dataset',
112
+ data_root='data/ade/ADEChallengeData2016',
113
+ img_dir='images/validation',
114
+ ann_dir='annotations/validation',
115
+ pipeline=[
116
+ dict(type='LoadImageFromFile'),
117
+ dict(
118
+ type='MultiScaleFlipAug',
119
+ img_scale=(2048, 512),
120
+ flip=False,
121
+ transforms=[
122
+ dict(type='Resize', keep_ratio=True),
123
+ dict(type='RandomFlip'),
124
+ dict(
125
+ type='Normalize',
126
+ mean=[123.675, 116.28, 103.53],
127
+ std=[58.395, 57.12, 57.375],
128
+ to_rgb=True),
129
+ dict(
130
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
131
+ dict(type='ImageToTensor', keys=['img']),
132
+ dict(type='Collect', keys=['img'])
133
+ ])
134
+ ]),
135
+ test=dict(
136
+ type='ADE20K151Dataset',
137
+ data_root='data/ade/ADEChallengeData2016',
138
+ img_dir='images/validation',
139
+ ann_dir='annotations/validation',
140
+ pipeline=[
141
+ dict(type='LoadImageFromFile'),
142
+ dict(
143
+ type='MultiScaleFlipAug',
144
+ img_scale=(2048, 512),
145
+ flip=False,
146
+ transforms=[
147
+ dict(type='Resize', keep_ratio=True),
148
+ dict(type='RandomFlip'),
149
+ dict(
150
+ type='Normalize',
151
+ mean=[123.675, 116.28, 103.53],
152
+ std=[58.395, 57.12, 57.375],
153
+ to_rgb=True),
154
+ dict(
155
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
156
+ dict(type='ImageToTensor', keys=['img']),
157
+ dict(type='Collect', keys=['img'])
158
+ ])
159
+ ]))
160
+ log_config = dict(
161
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
162
+ dist_params = dict(backend='nccl')
163
+ log_level = 'INFO'
164
+ load_from = None
165
+ resume_from = None
166
+ workflow = [('train', 1)]
167
+ cudnn_benchmark = True
168
+ optimizer = dict(
169
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
170
+ optimizer_config = dict()
171
+ lr_config = dict(
172
+ policy='step',
173
+ warmup='linear',
174
+ warmup_iters=1000,
175
+ warmup_ratio=1e-06,
176
+ step=50000,
177
+ gamma=0.5,
178
+ min_lr=1e-06,
179
+ by_epoch=False)
180
+ runner = dict(type='IterBasedRunner', max_iters=400000)
181
+ checkpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1)
182
+ evaluation = dict(
183
+ interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU')
184
+ custom_hooks = [
185
+ dict(
186
+ type='ConstantMomentumEMAHook',
187
+ momentum=0.01,
188
+ interval=25,
189
+ eval_interval=16000,
190
+ auto_resume=True,
191
+ priority=49)
192
+ ]
193
+ work_dir = './work_dirs/segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_t100'
194
+ gpu_ids = range(0, 8)
195
+ auto_resume = True
segformer_b2_singlestep/20230303_135933.log ADDED
@@ -0,0 +1,1137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-03 13:59:33,312 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-03 13:59:33,327 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-03 13:59:33,327 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-03 13:59:33,410 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+ad87029
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-03 13:59:33,411 - mmseg - INFO - Distributed training: True
38
+ 2023-03-03 13:59:34,043 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
41
+ model = dict(
42
+ type='EncoderDecoderFreeze',
43
+ freeze_parameters=['backbone', 'decode_head'],
44
+ pretrained=
45
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
46
+ backbone=dict(
47
+ type='MixVisionTransformerCustomInitWeights',
48
+ in_channels=3,
49
+ embed_dims=64,
50
+ num_stages=4,
51
+ num_layers=[3, 4, 6, 3],
52
+ num_heads=[1, 2, 5, 8],
53
+ patch_sizes=[7, 3, 3, 3],
54
+ sr_ratios=[8, 4, 2, 1],
55
+ out_indices=(0, 1, 2, 3),
56
+ mlp_ratio=4,
57
+ qkv_bias=True,
58
+ drop_rate=0.0,
59
+ attn_drop_rate=0.0,
60
+ drop_path_rate=0.1),
61
+ decode_head=dict(
62
+ type='SegformerHeadUnetFCHeadSingleStep',
63
+ pretrained=
64
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
65
+ dim=128,
66
+ out_dim=256,
67
+ unet_channels=272,
68
+ dim_mults=[1, 1, 1],
69
+ cat_embedding_dim=16,
70
+ in_channels=[64, 128, 320, 512],
71
+ in_index=[0, 1, 2, 3],
72
+ channels=256,
73
+ dropout_ratio=0.1,
74
+ num_classes=151,
75
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
76
+ align_corners=False,
77
+ ignore_index=0,
78
+ loss_decode=dict(
79
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
80
+ train_cfg=dict(),
81
+ test_cfg=dict(mode='whole'))
82
+ dataset_type = 'ADE20K151Dataset'
83
+ data_root = 'data/ade/ADEChallengeData2016'
84
+ img_norm_cfg = dict(
85
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
86
+ crop_size = (512, 512)
87
+ train_pipeline = [
88
+ dict(type='LoadImageFromFile'),
89
+ dict(type='LoadAnnotations', reduce_zero_label=False),
90
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
91
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
92
+ dict(type='RandomFlip', prob=0.5),
93
+ dict(type='PhotoMetricDistortion'),
94
+ dict(
95
+ type='Normalize',
96
+ mean=[123.675, 116.28, 103.53],
97
+ std=[58.395, 57.12, 57.375],
98
+ to_rgb=True),
99
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
100
+ dict(type='DefaultFormatBundle'),
101
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
102
+ ]
103
+ test_pipeline = [
104
+ dict(type='LoadImageFromFile'),
105
+ dict(
106
+ type='MultiScaleFlipAug',
107
+ img_scale=(2048, 512),
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip'),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
118
+ dict(type='ImageToTensor', keys=['img']),
119
+ dict(type='Collect', keys=['img'])
120
+ ])
121
+ ]
122
+ data = dict(
123
+ samples_per_gpu=4,
124
+ workers_per_gpu=4,
125
+ train=dict(
126
+ type='ADE20K151Dataset',
127
+ data_root='data/ade/ADEChallengeData2016',
128
+ img_dir='images/training',
129
+ ann_dir='annotations/training',
130
+ pipeline=[
131
+ dict(type='LoadImageFromFile'),
132
+ dict(type='LoadAnnotations', reduce_zero_label=False),
133
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
134
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
135
+ dict(type='RandomFlip', prob=0.5),
136
+ dict(type='PhotoMetricDistortion'),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
143
+ dict(type='DefaultFormatBundle'),
144
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
145
+ ]),
146
+ val=dict(
147
+ type='ADE20K151Dataset',
148
+ data_root='data/ade/ADEChallengeData2016',
149
+ img_dir='images/validation',
150
+ ann_dir='annotations/validation',
151
+ pipeline=[
152
+ dict(type='LoadImageFromFile'),
153
+ dict(
154
+ type='MultiScaleFlipAug',
155
+ img_scale=(2048, 512),
156
+ flip=False,
157
+ transforms=[
158
+ dict(type='Resize', keep_ratio=True),
159
+ dict(type='RandomFlip'),
160
+ dict(
161
+ type='Normalize',
162
+ mean=[123.675, 116.28, 103.53],
163
+ std=[58.395, 57.12, 57.375],
164
+ to_rgb=True),
165
+ dict(
166
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
167
+ dict(type='ImageToTensor', keys=['img']),
168
+ dict(type='Collect', keys=['img'])
169
+ ])
170
+ ]),
171
+ test=dict(
172
+ type='ADE20K151Dataset',
173
+ data_root='data/ade/ADEChallengeData2016',
174
+ img_dir='images/validation',
175
+ ann_dir='annotations/validation',
176
+ pipeline=[
177
+ dict(type='LoadImageFromFile'),
178
+ dict(
179
+ type='MultiScaleFlipAug',
180
+ img_scale=(2048, 512),
181
+ flip=False,
182
+ transforms=[
183
+ dict(type='Resize', keep_ratio=True),
184
+ dict(type='RandomFlip'),
185
+ dict(
186
+ type='Normalize',
187
+ mean=[123.675, 116.28, 103.53],
188
+ std=[58.395, 57.12, 57.375],
189
+ to_rgb=True),
190
+ dict(
191
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
192
+ dict(type='ImageToTensor', keys=['img']),
193
+ dict(type='Collect', keys=['img'])
194
+ ])
195
+ ]))
196
+ log_config = dict(
197
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
198
+ dist_params = dict(backend='nccl')
199
+ log_level = 'INFO'
200
+ load_from = None
201
+ resume_from = None
202
+ workflow = [('train', 1)]
203
+ cudnn_benchmark = True
204
+ optimizer = dict(
205
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
206
+ optimizer_config = dict()
207
+ lr_config = dict(
208
+ policy='step',
209
+ warmup='linear',
210
+ warmup_iters=1000,
211
+ warmup_ratio=1e-06,
212
+ step=10000,
213
+ gamma=0.5,
214
+ min_lr=1e-06,
215
+ by_epoch=False)
216
+ runner = dict(type='IterBasedRunner', max_iters=80000)
217
+ checkpoint_config = dict(by_epoch=False, interval=8000)
218
+ evaluation = dict(
219
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
220
+ work_dir = './work_dirs/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151'
221
+ gpu_ids = range(0, 8)
222
+ auto_resume = True
223
+
224
+ 2023-03-03 13:59:38,432 - mmseg - INFO - Set random seed to 97773280, deterministic: False
225
+ 2023-03-03 13:59:38,757 - mmseg - INFO - Parameters in backbone freezed!
226
+ 2023-03-03 13:59:38,758 - mmseg - INFO - Trainable parameters in SegformerHeadUnetFCHeadSingleStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
227
+ 2023-03-03 13:59:38,758 - mmseg - INFO - Parameters in decode_head freezed!
228
+ 2023-03-03 13:59:38,778 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
229
+ 2023-03-03 13:59:39,026 - mmseg - WARNING - The model and loaded state dict do not match exactly
230
+
231
+ unexpected key in source state_dict: decode_head.conv_seg.weight, decode_head.conv_seg.bias, decode_head.convs.0.conv.weight, decode_head.convs.0.bn.weight, decode_head.convs.0.bn.bias, decode_head.convs.0.bn.running_mean, decode_head.convs.0.bn.running_var, decode_head.convs.0.bn.num_batches_tracked, decode_head.convs.1.conv.weight, decode_head.convs.1.bn.weight, decode_head.convs.1.bn.bias, decode_head.convs.1.bn.running_mean, decode_head.convs.1.bn.running_var, decode_head.convs.1.bn.num_batches_tracked, decode_head.convs.2.conv.weight, decode_head.convs.2.bn.weight, decode_head.convs.2.bn.bias, decode_head.convs.2.bn.running_mean, decode_head.convs.2.bn.running_var, decode_head.convs.2.bn.num_batches_tracked, decode_head.convs.3.conv.weight, decode_head.convs.3.bn.weight, decode_head.convs.3.bn.bias, decode_head.convs.3.bn.running_mean, decode_head.convs.3.bn.running_var, decode_head.convs.3.bn.num_batches_tracked, decode_head.fusion_conv.conv.weight, decode_head.fusion_conv.bn.weight, decode_head.fusion_conv.bn.bias, decode_head.fusion_conv.bn.running_mean, decode_head.fusion_conv.bn.running_var, decode_head.fusion_conv.bn.num_batches_tracked
232
+
233
+ 2023-03-03 13:59:39,040 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
234
+ 2023-03-03 13:59:39,262 - mmseg - WARNING - The model and loaded state dict do not match exactly
235
+
236
+ unexpected key in source state_dict: backbone.layers.0.0.projection.weight, backbone.layers.0.0.projection.bias, backbone.layers.0.0.norm.weight, backbone.layers.0.0.norm.bias, backbone.layers.0.1.0.norm1.weight, backbone.layers.0.1.0.norm1.bias, backbone.layers.0.1.0.attn.attn.in_proj_weight, backbone.layers.0.1.0.attn.attn.in_proj_bias, backbone.layers.0.1.0.attn.attn.out_proj.weight, backbone.layers.0.1.0.attn.attn.out_proj.bias, backbone.layers.0.1.0.attn.sr.weight, backbone.layers.0.1.0.attn.sr.bias, backbone.layers.0.1.0.attn.norm.weight, backbone.layers.0.1.0.attn.norm.bias, backbone.layers.0.1.0.norm2.weight, backbone.layers.0.1.0.norm2.bias, backbone.layers.0.1.0.ffn.layers.0.weight, backbone.layers.0.1.0.ffn.layers.0.bias, backbone.layers.0.1.0.ffn.layers.1.weight, backbone.layers.0.1.0.ffn.layers.1.bias, backbone.layers.0.1.0.ffn.layers.4.weight, backbone.layers.0.1.0.ffn.layers.4.bias, backbone.layers.0.1.1.norm1.weight, backbone.layers.0.1.1.norm1.bias, backbone.layers.0.1.1.attn.attn.in_proj_weight, backbone.layers.0.1.1.attn.attn.in_proj_bias, backbone.layers.0.1.1.attn.attn.out_proj.weight, backbone.layers.0.1.1.attn.attn.out_proj.bias, backbone.layers.0.1.1.attn.sr.weight, backbone.layers.0.1.1.attn.sr.bias, backbone.layers.0.1.1.attn.norm.weight, backbone.layers.0.1.1.attn.norm.bias, backbone.layers.0.1.1.norm2.weight, backbone.layers.0.1.1.norm2.bias, backbone.layers.0.1.1.ffn.layers.0.weight, backbone.layers.0.1.1.ffn.layers.0.bias, backbone.layers.0.1.1.ffn.layers.1.weight, backbone.layers.0.1.1.ffn.layers.1.bias, backbone.layers.0.1.1.ffn.layers.4.weight, backbone.layers.0.1.1.ffn.layers.4.bias, backbone.layers.0.1.2.norm1.weight, backbone.layers.0.1.2.norm1.bias, backbone.layers.0.1.2.attn.attn.in_proj_weight, backbone.layers.0.1.2.attn.attn.in_proj_bias, backbone.layers.0.1.2.attn.attn.out_proj.weight, backbone.layers.0.1.2.attn.attn.out_proj.bias, backbone.layers.0.1.2.attn.sr.weight, backbone.layers.0.1.2.attn.sr.bias, backbone.layers.0.1.2.attn.norm.weight, backbone.layers.0.1.2.attn.norm.bias, backbone.layers.0.1.2.norm2.weight, backbone.layers.0.1.2.norm2.bias, backbone.layers.0.1.2.ffn.layers.0.weight, backbone.layers.0.1.2.ffn.layers.0.bias, backbone.layers.0.1.2.ffn.layers.1.weight, backbone.layers.0.1.2.ffn.layers.1.bias, backbone.layers.0.1.2.ffn.layers.4.weight, backbone.layers.0.1.2.ffn.layers.4.bias, backbone.layers.0.2.weight, backbone.layers.0.2.bias, backbone.layers.1.0.projection.weight, backbone.layers.1.0.projection.bias, backbone.layers.1.0.norm.weight, backbone.layers.1.0.norm.bias, backbone.layers.1.1.0.norm1.weight, backbone.layers.1.1.0.norm1.bias, backbone.layers.1.1.0.attn.attn.in_proj_weight, backbone.layers.1.1.0.attn.attn.in_proj_bias, backbone.layers.1.1.0.attn.attn.out_proj.weight, backbone.layers.1.1.0.attn.attn.out_proj.bias, backbone.layers.1.1.0.attn.sr.weight, backbone.layers.1.1.0.attn.sr.bias, backbone.layers.1.1.0.attn.norm.weight, backbone.layers.1.1.0.attn.norm.bias, backbone.layers.1.1.0.norm2.weight, backbone.layers.1.1.0.norm2.bias, backbone.layers.1.1.0.ffn.layers.0.weight, backbone.layers.1.1.0.ffn.layers.0.bias, backbone.layers.1.1.0.ffn.layers.1.weight, backbone.layers.1.1.0.ffn.layers.1.bias, backbone.layers.1.1.0.ffn.layers.4.weight, backbone.layers.1.1.0.ffn.layers.4.bias, backbone.layers.1.1.1.norm1.weight, backbone.layers.1.1.1.norm1.bias, backbone.layers.1.1.1.attn.attn.in_proj_weight, backbone.layers.1.1.1.attn.attn.in_proj_bias, backbone.layers.1.1.1.attn.attn.out_proj.weight, backbone.layers.1.1.1.attn.attn.out_proj.bias, backbone.layers.1.1.1.attn.sr.weight, backbone.layers.1.1.1.attn.sr.bias, backbone.layers.1.1.1.attn.norm.weight, backbone.layers.1.1.1.attn.norm.bias, backbone.layers.1.1.1.norm2.weight, backbone.layers.1.1.1.norm2.bias, backbone.layers.1.1.1.ffn.layers.0.weight, backbone.layers.1.1.1.ffn.layers.0.bias, backbone.layers.1.1.1.ffn.layers.1.weight, backbone.layers.1.1.1.ffn.layers.1.bias, backbone.layers.1.1.1.ffn.layers.4.weight, backbone.layers.1.1.1.ffn.layers.4.bias, backbone.layers.1.1.2.norm1.weight, backbone.layers.1.1.2.norm1.bias, backbone.layers.1.1.2.attn.attn.in_proj_weight, backbone.layers.1.1.2.attn.attn.in_proj_bias, backbone.layers.1.1.2.attn.attn.out_proj.weight, backbone.layers.1.1.2.attn.attn.out_proj.bias, backbone.layers.1.1.2.attn.sr.weight, backbone.layers.1.1.2.attn.sr.bias, backbone.layers.1.1.2.attn.norm.weight, backbone.layers.1.1.2.attn.norm.bias, backbone.layers.1.1.2.norm2.weight, backbone.layers.1.1.2.norm2.bias, backbone.layers.1.1.2.ffn.layers.0.weight, backbone.layers.1.1.2.ffn.layers.0.bias, backbone.layers.1.1.2.ffn.layers.1.weight, backbone.layers.1.1.2.ffn.layers.1.bias, backbone.layers.1.1.2.ffn.layers.4.weight, backbone.layers.1.1.2.ffn.layers.4.bias, backbone.layers.1.1.3.norm1.weight, backbone.layers.1.1.3.norm1.bias, backbone.layers.1.1.3.attn.attn.in_proj_weight, backbone.layers.1.1.3.attn.attn.in_proj_bias, backbone.layers.1.1.3.attn.attn.out_proj.weight, backbone.layers.1.1.3.attn.attn.out_proj.bias, backbone.layers.1.1.3.attn.sr.weight, backbone.layers.1.1.3.attn.sr.bias, backbone.layers.1.1.3.attn.norm.weight, backbone.layers.1.1.3.attn.norm.bias, backbone.layers.1.1.3.norm2.weight, backbone.layers.1.1.3.norm2.bias, backbone.layers.1.1.3.ffn.layers.0.weight, backbone.layers.1.1.3.ffn.layers.0.bias, backbone.layers.1.1.3.ffn.layers.1.weight, backbone.layers.1.1.3.ffn.layers.1.bias, backbone.layers.1.1.3.ffn.layers.4.weight, backbone.layers.1.1.3.ffn.layers.4.bias, backbone.layers.1.2.weight, backbone.layers.1.2.bias, backbone.layers.2.0.projection.weight, backbone.layers.2.0.projection.bias, backbone.layers.2.0.norm.weight, backbone.layers.2.0.norm.bias, backbone.layers.2.1.0.norm1.weight, backbone.layers.2.1.0.norm1.bias, backbone.layers.2.1.0.attn.attn.in_proj_weight, backbone.layers.2.1.0.attn.attn.in_proj_bias, backbone.layers.2.1.0.attn.attn.out_proj.weight, backbone.layers.2.1.0.attn.attn.out_proj.bias, backbone.layers.2.1.0.attn.sr.weight, backbone.layers.2.1.0.attn.sr.bias, backbone.layers.2.1.0.attn.norm.weight, backbone.layers.2.1.0.attn.norm.bias, backbone.layers.2.1.0.norm2.weight, backbone.layers.2.1.0.norm2.bias, backbone.layers.2.1.0.ffn.layers.0.weight, backbone.layers.2.1.0.ffn.layers.0.bias, backbone.layers.2.1.0.ffn.layers.1.weight, backbone.layers.2.1.0.ffn.layers.1.bias, backbone.layers.2.1.0.ffn.layers.4.weight, backbone.layers.2.1.0.ffn.layers.4.bias, backbone.layers.2.1.1.norm1.weight, backbone.layers.2.1.1.norm1.bias, backbone.layers.2.1.1.attn.attn.in_proj_weight, backbone.layers.2.1.1.attn.attn.in_proj_bias, backbone.layers.2.1.1.attn.attn.out_proj.weight, backbone.layers.2.1.1.attn.attn.out_proj.bias, backbone.layers.2.1.1.attn.sr.weight, backbone.layers.2.1.1.attn.sr.bias, backbone.layers.2.1.1.attn.norm.weight, backbone.layers.2.1.1.attn.norm.bias, backbone.layers.2.1.1.norm2.weight, backbone.layers.2.1.1.norm2.bias, backbone.layers.2.1.1.ffn.layers.0.weight, backbone.layers.2.1.1.ffn.layers.0.bias, backbone.layers.2.1.1.ffn.layers.1.weight, backbone.layers.2.1.1.ffn.layers.1.bias, backbone.layers.2.1.1.ffn.layers.4.weight, backbone.layers.2.1.1.ffn.layers.4.bias, backbone.layers.2.1.2.norm1.weight, backbone.layers.2.1.2.norm1.bias, backbone.layers.2.1.2.attn.attn.in_proj_weight, backbone.layers.2.1.2.attn.attn.in_proj_bias, backbone.layers.2.1.2.attn.attn.out_proj.weight, backbone.layers.2.1.2.attn.attn.out_proj.bias, backbone.layers.2.1.2.attn.sr.weight, backbone.layers.2.1.2.attn.sr.bias, backbone.layers.2.1.2.attn.norm.weight, backbone.layers.2.1.2.attn.norm.bias, backbone.layers.2.1.2.norm2.weight, backbone.layers.2.1.2.norm2.bias, backbone.layers.2.1.2.ffn.layers.0.weight, backbone.layers.2.1.2.ffn.layers.0.bias, backbone.layers.2.1.2.ffn.layers.1.weight, backbone.layers.2.1.2.ffn.layers.1.bias, backbone.layers.2.1.2.ffn.layers.4.weight, backbone.layers.2.1.2.ffn.layers.4.bias, backbone.layers.2.1.3.norm1.weight, backbone.layers.2.1.3.norm1.bias, backbone.layers.2.1.3.attn.attn.in_proj_weight, backbone.layers.2.1.3.attn.attn.in_proj_bias, backbone.layers.2.1.3.attn.attn.out_proj.weight, backbone.layers.2.1.3.attn.attn.out_proj.bias, backbone.layers.2.1.3.attn.sr.weight, backbone.layers.2.1.3.attn.sr.bias, backbone.layers.2.1.3.attn.norm.weight, backbone.layers.2.1.3.attn.norm.bias, backbone.layers.2.1.3.norm2.weight, backbone.layers.2.1.3.norm2.bias, backbone.layers.2.1.3.ffn.layers.0.weight, backbone.layers.2.1.3.ffn.layers.0.bias, backbone.layers.2.1.3.ffn.layers.1.weight, backbone.layers.2.1.3.ffn.layers.1.bias, backbone.layers.2.1.3.ffn.layers.4.weight, backbone.layers.2.1.3.ffn.layers.4.bias, backbone.layers.2.1.4.norm1.weight, backbone.layers.2.1.4.norm1.bias, backbone.layers.2.1.4.attn.attn.in_proj_weight, backbone.layers.2.1.4.attn.attn.in_proj_bias, backbone.layers.2.1.4.attn.attn.out_proj.weight, backbone.layers.2.1.4.attn.attn.out_proj.bias, backbone.layers.2.1.4.attn.sr.weight, backbone.layers.2.1.4.attn.sr.bias, backbone.layers.2.1.4.attn.norm.weight, backbone.layers.2.1.4.attn.norm.bias, backbone.layers.2.1.4.norm2.weight, backbone.layers.2.1.4.norm2.bias, backbone.layers.2.1.4.ffn.layers.0.weight, backbone.layers.2.1.4.ffn.layers.0.bias, backbone.layers.2.1.4.ffn.layers.1.weight, backbone.layers.2.1.4.ffn.layers.1.bias, backbone.layers.2.1.4.ffn.layers.4.weight, backbone.layers.2.1.4.ffn.layers.4.bias, backbone.layers.2.1.5.norm1.weight, backbone.layers.2.1.5.norm1.bias, backbone.layers.2.1.5.attn.attn.in_proj_weight, backbone.layers.2.1.5.attn.attn.in_proj_bias, backbone.layers.2.1.5.attn.attn.out_proj.weight, backbone.layers.2.1.5.attn.attn.out_proj.bias, backbone.layers.2.1.5.attn.sr.weight, backbone.layers.2.1.5.attn.sr.bias, backbone.layers.2.1.5.attn.norm.weight, backbone.layers.2.1.5.attn.norm.bias, backbone.layers.2.1.5.norm2.weight, backbone.layers.2.1.5.norm2.bias, backbone.layers.2.1.5.ffn.layers.0.weight, backbone.layers.2.1.5.ffn.layers.0.bias, backbone.layers.2.1.5.ffn.layers.1.weight, backbone.layers.2.1.5.ffn.layers.1.bias, backbone.layers.2.1.5.ffn.layers.4.weight, backbone.layers.2.1.5.ffn.layers.4.bias, backbone.layers.2.2.weight, backbone.layers.2.2.bias, backbone.layers.3.0.projection.weight, backbone.layers.3.0.projection.bias, backbone.layers.3.0.norm.weight, backbone.layers.3.0.norm.bias, backbone.layers.3.1.0.norm1.weight, backbone.layers.3.1.0.norm1.bias, backbone.layers.3.1.0.attn.attn.in_proj_weight, backbone.layers.3.1.0.attn.attn.in_proj_bias, backbone.layers.3.1.0.attn.attn.out_proj.weight, backbone.layers.3.1.0.attn.attn.out_proj.bias, backbone.layers.3.1.0.norm2.weight, backbone.layers.3.1.0.norm2.bias, backbone.layers.3.1.0.ffn.layers.0.weight, backbone.layers.3.1.0.ffn.layers.0.bias, backbone.layers.3.1.0.ffn.layers.1.weight, backbone.layers.3.1.0.ffn.layers.1.bias, backbone.layers.3.1.0.ffn.layers.4.weight, backbone.layers.3.1.0.ffn.layers.4.bias, backbone.layers.3.1.1.norm1.weight, backbone.layers.3.1.1.norm1.bias, backbone.layers.3.1.1.attn.attn.in_proj_weight, backbone.layers.3.1.1.attn.attn.in_proj_bias, backbone.layers.3.1.1.attn.attn.out_proj.weight, backbone.layers.3.1.1.attn.attn.out_proj.bias, backbone.layers.3.1.1.norm2.weight, backbone.layers.3.1.1.norm2.bias, backbone.layers.3.1.1.ffn.layers.0.weight, backbone.layers.3.1.1.ffn.layers.0.bias, backbone.layers.3.1.1.ffn.layers.1.weight, backbone.layers.3.1.1.ffn.layers.1.bias, backbone.layers.3.1.1.ffn.layers.4.weight, backbone.layers.3.1.1.ffn.layers.4.bias, backbone.layers.3.1.2.norm1.weight, backbone.layers.3.1.2.norm1.bias, backbone.layers.3.1.2.attn.attn.in_proj_weight, backbone.layers.3.1.2.attn.attn.in_proj_bias, backbone.layers.3.1.2.attn.attn.out_proj.weight, backbone.layers.3.1.2.attn.attn.out_proj.bias, backbone.layers.3.1.2.norm2.weight, backbone.layers.3.1.2.norm2.bias, backbone.layers.3.1.2.ffn.layers.0.weight, backbone.layers.3.1.2.ffn.layers.0.bias, backbone.layers.3.1.2.ffn.layers.1.weight, backbone.layers.3.1.2.ffn.layers.1.bias, backbone.layers.3.1.2.ffn.layers.4.weight, backbone.layers.3.1.2.ffn.layers.4.bias, backbone.layers.3.2.weight, backbone.layers.3.2.bias
237
+
238
+ missing keys in source state_dict: unet.init_conv.weight, unet.init_conv.bias, unet.time_mlp.1.weight, unet.time_mlp.1.bias, unet.time_mlp.3.weight, unet.time_mlp.3.bias, unet.downs.0.0.mlp.1.weight, unet.downs.0.0.mlp.1.bias, unet.downs.0.0.block1.proj.weight, unet.downs.0.0.block1.proj.bias, unet.downs.0.0.block1.norm.weight, unet.downs.0.0.block1.norm.bias, unet.downs.0.0.block2.proj.weight, unet.downs.0.0.block2.proj.bias, unet.downs.0.0.block2.norm.weight, unet.downs.0.0.block2.norm.bias, unet.downs.0.1.mlp.1.weight, unet.downs.0.1.mlp.1.bias, unet.downs.0.1.block1.proj.weight, unet.downs.0.1.block1.proj.bias, unet.downs.0.1.block1.norm.weight, unet.downs.0.1.block1.norm.bias, unet.downs.0.1.block2.proj.weight, unet.downs.0.1.block2.proj.bias, unet.downs.0.1.block2.norm.weight, unet.downs.0.1.block2.norm.bias, unet.downs.0.2.fn.fn.to_qkv.weight, unet.downs.0.2.fn.fn.to_out.0.weight, unet.downs.0.2.fn.fn.to_out.0.bias, unet.downs.0.2.fn.fn.to_out.1.g, unet.downs.0.2.fn.norm.g, unet.downs.0.3.weight, unet.downs.0.3.bias, unet.downs.1.0.mlp.1.weight, unet.downs.1.0.mlp.1.bias, unet.downs.1.0.block1.proj.weight, unet.downs.1.0.block1.proj.bias, unet.downs.1.0.block1.norm.weight, unet.downs.1.0.block1.norm.bias, unet.downs.1.0.block2.proj.weight, unet.downs.1.0.block2.proj.bias, unet.downs.1.0.block2.norm.weight, unet.downs.1.0.block2.norm.bias, unet.downs.1.1.mlp.1.weight, unet.downs.1.1.mlp.1.bias, unet.downs.1.1.block1.proj.weight, unet.downs.1.1.block1.proj.bias, unet.downs.1.1.block1.norm.weight, unet.downs.1.1.block1.norm.bias, unet.downs.1.1.block2.proj.weight, unet.downs.1.1.block2.proj.bias, unet.downs.1.1.block2.norm.weight, unet.downs.1.1.block2.norm.bias, unet.downs.1.2.fn.fn.to_qkv.weight, unet.downs.1.2.fn.fn.to_out.0.weight, unet.downs.1.2.fn.fn.to_out.0.bias, unet.downs.1.2.fn.fn.to_out.1.g, unet.downs.1.2.fn.norm.g, unet.downs.1.3.weight, unet.downs.1.3.bias, unet.downs.2.0.mlp.1.weight, unet.downs.2.0.mlp.1.bias, unet.downs.2.0.block1.proj.weight, unet.downs.2.0.block1.proj.bias, unet.downs.2.0.block1.norm.weight, unet.downs.2.0.block1.norm.bias, unet.downs.2.0.block2.proj.weight, unet.downs.2.0.block2.proj.bias, unet.downs.2.0.block2.norm.weight, unet.downs.2.0.block2.norm.bias, unet.downs.2.1.mlp.1.weight, unet.downs.2.1.mlp.1.bias, unet.downs.2.1.block1.proj.weight, unet.downs.2.1.block1.proj.bias, unet.downs.2.1.block1.norm.weight, unet.downs.2.1.block1.norm.bias, unet.downs.2.1.block2.proj.weight, unet.downs.2.1.block2.proj.bias, unet.downs.2.1.block2.norm.weight, unet.downs.2.1.block2.norm.bias, unet.downs.2.2.fn.fn.to_qkv.weight, unet.downs.2.2.fn.fn.to_out.0.weight, unet.downs.2.2.fn.fn.to_out.0.bias, unet.downs.2.2.fn.fn.to_out.1.g, unet.downs.2.2.fn.norm.g, unet.downs.2.3.weight, unet.downs.2.3.bias, unet.ups.0.0.mlp.1.weight, unet.ups.0.0.mlp.1.bias, unet.ups.0.0.block1.proj.weight, unet.ups.0.0.block1.proj.bias, unet.ups.0.0.block1.norm.weight, unet.ups.0.0.block1.norm.bias, unet.ups.0.0.block2.proj.weight, unet.ups.0.0.block2.proj.bias, unet.ups.0.0.block2.norm.weight, unet.ups.0.0.block2.norm.bias, unet.ups.0.0.res_conv.weight, unet.ups.0.0.res_conv.bias, unet.ups.0.1.mlp.1.weight, unet.ups.0.1.mlp.1.bias, unet.ups.0.1.block1.proj.weight, unet.ups.0.1.block1.proj.bias, unet.ups.0.1.block1.norm.weight, unet.ups.0.1.block1.norm.bias, unet.ups.0.1.block2.proj.weight, unet.ups.0.1.block2.proj.bias, unet.ups.0.1.block2.norm.weight, unet.ups.0.1.block2.norm.bias, unet.ups.0.1.res_conv.weight, unet.ups.0.1.res_conv.bias, unet.ups.0.2.fn.fn.to_qkv.weight, unet.ups.0.2.fn.fn.to_out.0.weight, unet.ups.0.2.fn.fn.to_out.0.bias, unet.ups.0.2.fn.fn.to_out.1.g, unet.ups.0.2.fn.norm.g, unet.ups.0.3.1.weight, unet.ups.0.3.1.bias, unet.ups.1.0.mlp.1.weight, unet.ups.1.0.mlp.1.bias, unet.ups.1.0.block1.proj.weight, unet.ups.1.0.block1.proj.bias, unet.ups.1.0.block1.norm.weight, unet.ups.1.0.block1.norm.bias, unet.ups.1.0.block2.proj.weight, unet.ups.1.0.block2.proj.bias, unet.ups.1.0.block2.norm.weight, unet.ups.1.0.block2.norm.bias, unet.ups.1.0.res_conv.weight, unet.ups.1.0.res_conv.bias, unet.ups.1.1.mlp.1.weight, unet.ups.1.1.mlp.1.bias, unet.ups.1.1.block1.proj.weight, unet.ups.1.1.block1.proj.bias, unet.ups.1.1.block1.norm.weight, unet.ups.1.1.block1.norm.bias, unet.ups.1.1.block2.proj.weight, unet.ups.1.1.block2.proj.bias, unet.ups.1.1.block2.norm.weight, unet.ups.1.1.block2.norm.bias, unet.ups.1.1.res_conv.weight, unet.ups.1.1.res_conv.bias, unet.ups.1.2.fn.fn.to_qkv.weight, unet.ups.1.2.fn.fn.to_out.0.weight, unet.ups.1.2.fn.fn.to_out.0.bias, unet.ups.1.2.fn.fn.to_out.1.g, unet.ups.1.2.fn.norm.g, unet.ups.1.3.1.weight, unet.ups.1.3.1.bias, unet.ups.2.0.mlp.1.weight, unet.ups.2.0.mlp.1.bias, unet.ups.2.0.block1.proj.weight, unet.ups.2.0.block1.proj.bias, unet.ups.2.0.block1.norm.weight, unet.ups.2.0.block1.norm.bias, unet.ups.2.0.block2.proj.weight, unet.ups.2.0.block2.proj.bias, unet.ups.2.0.block2.norm.weight, unet.ups.2.0.block2.norm.bias, unet.ups.2.0.res_conv.weight, unet.ups.2.0.res_conv.bias, unet.ups.2.1.mlp.1.weight, unet.ups.2.1.mlp.1.bias, unet.ups.2.1.block1.proj.weight, unet.ups.2.1.block1.proj.bias, unet.ups.2.1.block1.norm.weight, unet.ups.2.1.block1.norm.bias, unet.ups.2.1.block2.proj.weight, unet.ups.2.1.block2.proj.bias, unet.ups.2.1.block2.norm.weight, unet.ups.2.1.block2.norm.bias, unet.ups.2.1.res_conv.weight, unet.ups.2.1.res_conv.bias, unet.ups.2.2.fn.fn.to_qkv.weight, unet.ups.2.2.fn.fn.to_out.0.weight, unet.ups.2.2.fn.fn.to_out.0.bias, unet.ups.2.2.fn.fn.to_out.1.g, unet.ups.2.2.fn.norm.g, unet.ups.2.3.weight, unet.ups.2.3.bias, unet.mid_block1.mlp.1.weight, unet.mid_block1.mlp.1.bias, unet.mid_block1.block1.proj.weight, unet.mid_block1.block1.proj.bias, unet.mid_block1.block1.norm.weight, unet.mid_block1.block1.norm.bias, unet.mid_block1.block2.proj.weight, unet.mid_block1.block2.proj.bias, unet.mid_block1.block2.norm.weight, unet.mid_block1.block2.norm.bias, unet.mid_attn.fn.fn.to_qkv.weight, unet.mid_attn.fn.fn.to_out.weight, unet.mid_attn.fn.fn.to_out.bias, unet.mid_attn.fn.norm.g, unet.mid_block2.mlp.1.weight, unet.mid_block2.mlp.1.bias, unet.mid_block2.block1.proj.weight, unet.mid_block2.block1.proj.bias, unet.mid_block2.block1.norm.weight, unet.mid_block2.block1.norm.bias, unet.mid_block2.block2.proj.weight, unet.mid_block2.block2.proj.bias, unet.mid_block2.block2.norm.weight, unet.mid_block2.block2.norm.bias, unet.final_res_block.mlp.1.weight, unet.final_res_block.mlp.1.bias, unet.final_res_block.block1.proj.weight, unet.final_res_block.block1.proj.bias, unet.final_res_block.block1.norm.weight, unet.final_res_block.block1.norm.bias, unet.final_res_block.block2.proj.weight, unet.final_res_block.block2.proj.bias, unet.final_res_block.block2.norm.weight, unet.final_res_block.block2.norm.bias, unet.final_res_block.res_conv.weight, unet.final_res_block.res_conv.bias, unet.final_conv.weight, unet.final_conv.bias, conv_seg_new.weight, conv_seg_new.bias, embed.weight
239
+
240
+ 2023-03-03 13:59:39,286 - mmseg - INFO - EncoderDecoderFreeze(
241
+ (backbone): MixVisionTransformerCustomInitWeights(
242
+ (layers): ModuleList(
243
+ (0): ModuleList(
244
+ (0): PatchEmbed(
245
+ (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
246
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
247
+ )
248
+ (1): ModuleList(
249
+ (0): TransformerEncoderLayer(
250
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
251
+ (attn): EfficientMultiheadAttention(
252
+ (attn): MultiheadAttention(
253
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
254
+ )
255
+ (proj_drop): Dropout(p=0.0, inplace=False)
256
+ (dropout_layer): DropPath()
257
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
258
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
259
+ )
260
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
261
+ (ffn): MixFFN(
262
+ (activate): GELU(approximate='none')
263
+ (layers): Sequential(
264
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
265
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
266
+ (2): GELU(approximate='none')
267
+ (3): Dropout(p=0.0, inplace=False)
268
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
269
+ (5): Dropout(p=0.0, inplace=False)
270
+ )
271
+ (dropout_layer): DropPath()
272
+ )
273
+ )
274
+ (1): TransformerEncoderLayer(
275
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
276
+ (attn): EfficientMultiheadAttention(
277
+ (attn): MultiheadAttention(
278
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
279
+ )
280
+ (proj_drop): Dropout(p=0.0, inplace=False)
281
+ (dropout_layer): DropPath()
282
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
283
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
284
+ )
285
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
286
+ (ffn): MixFFN(
287
+ (activate): GELU(approximate='none')
288
+ (layers): Sequential(
289
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
290
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
291
+ (2): GELU(approximate='none')
292
+ (3): Dropout(p=0.0, inplace=False)
293
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
294
+ (5): Dropout(p=0.0, inplace=False)
295
+ )
296
+ (dropout_layer): DropPath()
297
+ )
298
+ )
299
+ (2): TransformerEncoderLayer(
300
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
301
+ (attn): EfficientMultiheadAttention(
302
+ (attn): MultiheadAttention(
303
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
304
+ )
305
+ (proj_drop): Dropout(p=0.0, inplace=False)
306
+ (dropout_layer): DropPath()
307
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
308
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
309
+ )
310
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
311
+ (ffn): MixFFN(
312
+ (activate): GELU(approximate='none')
313
+ (layers): Sequential(
314
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
315
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
316
+ (2): GELU(approximate='none')
317
+ (3): Dropout(p=0.0, inplace=False)
318
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
319
+ (5): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (dropout_layer): DropPath()
322
+ )
323
+ )
324
+ )
325
+ (2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
326
+ )
327
+ (1): ModuleList(
328
+ (0): PatchEmbed(
329
+ (projection): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
330
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
331
+ )
332
+ (1): ModuleList(
333
+ (0): TransformerEncoderLayer(
334
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
335
+ (attn): EfficientMultiheadAttention(
336
+ (attn): MultiheadAttention(
337
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
338
+ )
339
+ (proj_drop): Dropout(p=0.0, inplace=False)
340
+ (dropout_layer): DropPath()
341
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
342
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
343
+ )
344
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
345
+ (ffn): MixFFN(
346
+ (activate): GELU(approximate='none')
347
+ (layers): Sequential(
348
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
349
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
350
+ (2): GELU(approximate='none')
351
+ (3): Dropout(p=0.0, inplace=False)
352
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
353
+ (5): Dropout(p=0.0, inplace=False)
354
+ )
355
+ (dropout_layer): DropPath()
356
+ )
357
+ )
358
+ (1): TransformerEncoderLayer(
359
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
360
+ (attn): EfficientMultiheadAttention(
361
+ (attn): MultiheadAttention(
362
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
363
+ )
364
+ (proj_drop): Dropout(p=0.0, inplace=False)
365
+ (dropout_layer): DropPath()
366
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
367
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
368
+ )
369
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
370
+ (ffn): MixFFN(
371
+ (activate): GELU(approximate='none')
372
+ (layers): Sequential(
373
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
374
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
375
+ (2): GELU(approximate='none')
376
+ (3): Dropout(p=0.0, inplace=False)
377
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
378
+ (5): Dropout(p=0.0, inplace=False)
379
+ )
380
+ (dropout_layer): DropPath()
381
+ )
382
+ )
383
+ (2): TransformerEncoderLayer(
384
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
385
+ (attn): EfficientMultiheadAttention(
386
+ (attn): MultiheadAttention(
387
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
388
+ )
389
+ (proj_drop): Dropout(p=0.0, inplace=False)
390
+ (dropout_layer): DropPath()
391
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
392
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
393
+ )
394
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
395
+ (ffn): MixFFN(
396
+ (activate): GELU(approximate='none')
397
+ (layers): Sequential(
398
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
399
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
400
+ (2): GELU(approximate='none')
401
+ (3): Dropout(p=0.0, inplace=False)
402
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
403
+ (5): Dropout(p=0.0, inplace=False)
404
+ )
405
+ (dropout_layer): DropPath()
406
+ )
407
+ )
408
+ (3): TransformerEncoderLayer(
409
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
410
+ (attn): EfficientMultiheadAttention(
411
+ (attn): MultiheadAttention(
412
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
413
+ )
414
+ (proj_drop): Dropout(p=0.0, inplace=False)
415
+ (dropout_layer): DropPath()
416
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
417
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
418
+ )
419
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
420
+ (ffn): MixFFN(
421
+ (activate): GELU(approximate='none')
422
+ (layers): Sequential(
423
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
424
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
425
+ (2): GELU(approximate='none')
426
+ (3): Dropout(p=0.0, inplace=False)
427
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
428
+ (5): Dropout(p=0.0, inplace=False)
429
+ )
430
+ (dropout_layer): DropPath()
431
+ )
432
+ )
433
+ )
434
+ (2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
435
+ )
436
+ (2): ModuleList(
437
+ (0): PatchEmbed(
438
+ (projection): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
439
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
440
+ )
441
+ (1): ModuleList(
442
+ (0): TransformerEncoderLayer(
443
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
444
+ (attn): EfficientMultiheadAttention(
445
+ (attn): MultiheadAttention(
446
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
447
+ )
448
+ (proj_drop): Dropout(p=0.0, inplace=False)
449
+ (dropout_layer): DropPath()
450
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
451
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
452
+ )
453
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
454
+ (ffn): MixFFN(
455
+ (activate): GELU(approximate='none')
456
+ (layers): Sequential(
457
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
458
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
459
+ (2): GELU(approximate='none')
460
+ (3): Dropout(p=0.0, inplace=False)
461
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
462
+ (5): Dropout(p=0.0, inplace=False)
463
+ )
464
+ (dropout_layer): DropPath()
465
+ )
466
+ )
467
+ (1): TransformerEncoderLayer(
468
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
469
+ (attn): EfficientMultiheadAttention(
470
+ (attn): MultiheadAttention(
471
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
472
+ )
473
+ (proj_drop): Dropout(p=0.0, inplace=False)
474
+ (dropout_layer): DropPath()
475
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
476
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
477
+ )
478
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
479
+ (ffn): MixFFN(
480
+ (activate): GELU(approximate='none')
481
+ (layers): Sequential(
482
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
483
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
484
+ (2): GELU(approximate='none')
485
+ (3): Dropout(p=0.0, inplace=False)
486
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
487
+ (5): Dropout(p=0.0, inplace=False)
488
+ )
489
+ (dropout_layer): DropPath()
490
+ )
491
+ )
492
+ (2): TransformerEncoderLayer(
493
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
494
+ (attn): EfficientMultiheadAttention(
495
+ (attn): MultiheadAttention(
496
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
497
+ )
498
+ (proj_drop): Dropout(p=0.0, inplace=False)
499
+ (dropout_layer): DropPath()
500
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
501
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
502
+ )
503
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
504
+ (ffn): MixFFN(
505
+ (activate): GELU(approximate='none')
506
+ (layers): Sequential(
507
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
508
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
509
+ (2): GELU(approximate='none')
510
+ (3): Dropout(p=0.0, inplace=False)
511
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
512
+ (5): Dropout(p=0.0, inplace=False)
513
+ )
514
+ (dropout_layer): DropPath()
515
+ )
516
+ )
517
+ (3): TransformerEncoderLayer(
518
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
519
+ (attn): EfficientMultiheadAttention(
520
+ (attn): MultiheadAttention(
521
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
522
+ )
523
+ (proj_drop): Dropout(p=0.0, inplace=False)
524
+ (dropout_layer): DropPath()
525
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
526
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
527
+ )
528
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
529
+ (ffn): MixFFN(
530
+ (activate): GELU(approximate='none')
531
+ (layers): Sequential(
532
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
533
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
534
+ (2): GELU(approximate='none')
535
+ (3): Dropout(p=0.0, inplace=False)
536
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
537
+ (5): Dropout(p=0.0, inplace=False)
538
+ )
539
+ (dropout_layer): DropPath()
540
+ )
541
+ )
542
+ (4): TransformerEncoderLayer(
543
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
544
+ (attn): EfficientMultiheadAttention(
545
+ (attn): MultiheadAttention(
546
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
547
+ )
548
+ (proj_drop): Dropout(p=0.0, inplace=False)
549
+ (dropout_layer): DropPath()
550
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
551
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
552
+ )
553
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
554
+ (ffn): MixFFN(
555
+ (activate): GELU(approximate='none')
556
+ (layers): Sequential(
557
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
558
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
559
+ (2): GELU(approximate='none')
560
+ (3): Dropout(p=0.0, inplace=False)
561
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
562
+ (5): Dropout(p=0.0, inplace=False)
563
+ )
564
+ (dropout_layer): DropPath()
565
+ )
566
+ )
567
+ (5): TransformerEncoderLayer(
568
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
569
+ (attn): EfficientMultiheadAttention(
570
+ (attn): MultiheadAttention(
571
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
572
+ )
573
+ (proj_drop): Dropout(p=0.0, inplace=False)
574
+ (dropout_layer): DropPath()
575
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
576
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
577
+ )
578
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
579
+ (ffn): MixFFN(
580
+ (activate): GELU(approximate='none')
581
+ (layers): Sequential(
582
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
583
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
584
+ (2): GELU(approximate='none')
585
+ (3): Dropout(p=0.0, inplace=False)
586
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
587
+ (5): Dropout(p=0.0, inplace=False)
588
+ )
589
+ (dropout_layer): DropPath()
590
+ )
591
+ )
592
+ )
593
+ (2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
594
+ )
595
+ (3): ModuleList(
596
+ (0): PatchEmbed(
597
+ (projection): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
598
+ (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
599
+ )
600
+ (1): ModuleList(
601
+ (0): TransformerEncoderLayer(
602
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
603
+ (attn): EfficientMultiheadAttention(
604
+ (attn): MultiheadAttention(
605
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
606
+ )
607
+ (proj_drop): Dropout(p=0.0, inplace=False)
608
+ (dropout_layer): DropPath()
609
+ )
610
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
611
+ (ffn): MixFFN(
612
+ (activate): GELU(approximate='none')
613
+ (layers): Sequential(
614
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
615
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
616
+ (2): GELU(approximate='none')
617
+ (3): Dropout(p=0.0, inplace=False)
618
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
619
+ (5): Dropout(p=0.0, inplace=False)
620
+ )
621
+ (dropout_layer): DropPath()
622
+ )
623
+ )
624
+ (1): TransformerEncoderLayer(
625
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
626
+ (attn): EfficientMultiheadAttention(
627
+ (attn): MultiheadAttention(
628
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
629
+ )
630
+ (proj_drop): Dropout(p=0.0, inplace=False)
631
+ (dropout_layer): DropPath()
632
+ )
633
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
634
+ (ffn): MixFFN(
635
+ (activate): GELU(approximate='none')
636
+ (layers): Sequential(
637
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
638
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
639
+ (2): GELU(approximate='none')
640
+ (3): Dropout(p=0.0, inplace=False)
641
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
642
+ (5): Dropout(p=0.0, inplace=False)
643
+ )
644
+ (dropout_layer): DropPath()
645
+ )
646
+ )
647
+ (2): TransformerEncoderLayer(
648
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
649
+ (attn): EfficientMultiheadAttention(
650
+ (attn): MultiheadAttention(
651
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
652
+ )
653
+ (proj_drop): Dropout(p=0.0, inplace=False)
654
+ (dropout_layer): DropPath()
655
+ )
656
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
657
+ (ffn): MixFFN(
658
+ (activate): GELU(approximate='none')
659
+ (layers): Sequential(
660
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
661
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
662
+ (2): GELU(approximate='none')
663
+ (3): Dropout(p=0.0, inplace=False)
664
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
665
+ (5): Dropout(p=0.0, inplace=False)
666
+ )
667
+ (dropout_layer): DropPath()
668
+ )
669
+ )
670
+ )
671
+ (2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
672
+ )
673
+ )
674
+ )
675
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
676
+ (decode_head): SegformerHeadUnetFCHeadSingleStep(
677
+ input_transform=multiple_select, ignore_index=0, align_corners=False
678
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
679
+ (conv_seg): None
680
+ (dropout): Dropout2d(p=0.1, inplace=False)
681
+ (convs): ModuleList(
682
+ (0): ConvModule(
683
+ (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
684
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
685
+ (activate): ReLU(inplace=True)
686
+ )
687
+ (1): ConvModule(
688
+ (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
689
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
690
+ (activate): ReLU(inplace=True)
691
+ )
692
+ (2): ConvModule(
693
+ (conv): Conv2d(320, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
694
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
695
+ (activate): ReLU(inplace=True)
696
+ )
697
+ (3): ConvModule(
698
+ (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
699
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
700
+ (activate): ReLU(inplace=True)
701
+ )
702
+ )
703
+ (fusion_conv): ConvModule(
704
+ (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
705
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
706
+ (activate): ReLU(inplace=True)
707
+ )
708
+ (unet): Unet(
709
+ (init_conv): Conv2d(272, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
710
+ (time_mlp): Sequential(
711
+ (0): SinusoidalPosEmb()
712
+ (1): Linear(in_features=128, out_features=512, bias=True)
713
+ (2): GELU(approximate='none')
714
+ (3): Linear(in_features=512, out_features=512, bias=True)
715
+ )
716
+ (downs): ModuleList(
717
+ (0): ModuleList(
718
+ (0): ResnetBlock(
719
+ (mlp): Sequential(
720
+ (0): SiLU()
721
+ (1): Linear(in_features=512, out_features=256, bias=True)
722
+ )
723
+ (block1): Block(
724
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
725
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
726
+ (act): SiLU()
727
+ )
728
+ (block2): Block(
729
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
730
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
731
+ (act): SiLU()
732
+ )
733
+ (res_conv): Identity()
734
+ )
735
+ (1): ResnetBlock(
736
+ (mlp): Sequential(
737
+ (0): SiLU()
738
+ (1): Linear(in_features=512, out_features=256, bias=True)
739
+ )
740
+ (block1): Block(
741
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
742
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
743
+ (act): SiLU()
744
+ )
745
+ (block2): Block(
746
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
747
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
748
+ (act): SiLU()
749
+ )
750
+ (res_conv): Identity()
751
+ )
752
+ (2): Residual(
753
+ (fn): PreNorm(
754
+ (fn): LinearAttention(
755
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
756
+ (to_out): Sequential(
757
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
758
+ (1): LayerNorm()
759
+ )
760
+ )
761
+ (norm): LayerNorm()
762
+ )
763
+ )
764
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
765
+ )
766
+ (1): ModuleList(
767
+ (0): ResnetBlock(
768
+ (mlp): Sequential(
769
+ (0): SiLU()
770
+ (1): Linear(in_features=512, out_features=256, bias=True)
771
+ )
772
+ (block1): Block(
773
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
774
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
775
+ (act): SiLU()
776
+ )
777
+ (block2): Block(
778
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
779
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
780
+ (act): SiLU()
781
+ )
782
+ (res_conv): Identity()
783
+ )
784
+ (1): ResnetBlock(
785
+ (mlp): Sequential(
786
+ (0): SiLU()
787
+ (1): Linear(in_features=512, out_features=256, bias=True)
788
+ )
789
+ (block1): Block(
790
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
791
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
792
+ (act): SiLU()
793
+ )
794
+ (block2): Block(
795
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
796
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
797
+ (act): SiLU()
798
+ )
799
+ (res_conv): Identity()
800
+ )
801
+ (2): Residual(
802
+ (fn): PreNorm(
803
+ (fn): LinearAttention(
804
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
805
+ (to_out): Sequential(
806
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
807
+ (1): LayerNorm()
808
+ )
809
+ )
810
+ (norm): LayerNorm()
811
+ )
812
+ )
813
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
814
+ )
815
+ (2): ModuleList(
816
+ (0): ResnetBlock(
817
+ (mlp): Sequential(
818
+ (0): SiLU()
819
+ (1): Linear(in_features=512, out_features=256, bias=True)
820
+ )
821
+ (block1): Block(
822
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
823
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
824
+ (act): SiLU()
825
+ )
826
+ (block2): Block(
827
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
828
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
829
+ (act): SiLU()
830
+ )
831
+ (res_conv): Identity()
832
+ )
833
+ (1): ResnetBlock(
834
+ (mlp): Sequential(
835
+ (0): SiLU()
836
+ (1): Linear(in_features=512, out_features=256, bias=True)
837
+ )
838
+ (block1): Block(
839
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
840
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
841
+ (act): SiLU()
842
+ )
843
+ (block2): Block(
844
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
845
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
846
+ (act): SiLU()
847
+ )
848
+ (res_conv): Identity()
849
+ )
850
+ (2): Residual(
851
+ (fn): PreNorm(
852
+ (fn): LinearAttention(
853
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
854
+ (to_out): Sequential(
855
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
856
+ (1): LayerNorm()
857
+ )
858
+ )
859
+ (norm): LayerNorm()
860
+ )
861
+ )
862
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
863
+ )
864
+ )
865
+ (ups): ModuleList(
866
+ (0): ModuleList(
867
+ (0): ResnetBlock(
868
+ (mlp): Sequential(
869
+ (0): SiLU()
870
+ (1): Linear(in_features=512, out_features=256, bias=True)
871
+ )
872
+ (block1): Block(
873
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
874
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
875
+ (act): SiLU()
876
+ )
877
+ (block2): Block(
878
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
879
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
880
+ (act): SiLU()
881
+ )
882
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
883
+ )
884
+ (1): ResnetBlock(
885
+ (mlp): Sequential(
886
+ (0): SiLU()
887
+ (1): Linear(in_features=512, out_features=256, bias=True)
888
+ )
889
+ (block1): Block(
890
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
891
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
892
+ (act): SiLU()
893
+ )
894
+ (block2): Block(
895
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
896
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
897
+ (act): SiLU()
898
+ )
899
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
900
+ )
901
+ (2): Residual(
902
+ (fn): PreNorm(
903
+ (fn): LinearAttention(
904
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
905
+ (to_out): Sequential(
906
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
907
+ (1): LayerNorm()
908
+ )
909
+ )
910
+ (norm): LayerNorm()
911
+ )
912
+ )
913
+ (3): Sequential(
914
+ (0): Upsample(scale_factor=2.0, mode=nearest)
915
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
916
+ )
917
+ )
918
+ (1): ModuleList(
919
+ (0): ResnetBlock(
920
+ (mlp): Sequential(
921
+ (0): SiLU()
922
+ (1): Linear(in_features=512, out_features=256, bias=True)
923
+ )
924
+ (block1): Block(
925
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
926
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
927
+ (act): SiLU()
928
+ )
929
+ (block2): Block(
930
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
931
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
932
+ (act): SiLU()
933
+ )
934
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
935
+ )
936
+ (1): ResnetBlock(
937
+ (mlp): Sequential(
938
+ (0): SiLU()
939
+ (1): Linear(in_features=512, out_features=256, bias=True)
940
+ )
941
+ (block1): Block(
942
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
943
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
944
+ (act): SiLU()
945
+ )
946
+ (block2): Block(
947
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
948
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
949
+ (act): SiLU()
950
+ )
951
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
952
+ )
953
+ (2): Residual(
954
+ (fn): PreNorm(
955
+ (fn): LinearAttention(
956
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
957
+ (to_out): Sequential(
958
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
959
+ (1): LayerNorm()
960
+ )
961
+ )
962
+ (norm): LayerNorm()
963
+ )
964
+ )
965
+ (3): Sequential(
966
+ (0): Upsample(scale_factor=2.0, mode=nearest)
967
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
968
+ )
969
+ )
970
+ (2): ModuleList(
971
+ (0): ResnetBlock(
972
+ (mlp): Sequential(
973
+ (0): SiLU()
974
+ (1): Linear(in_features=512, out_features=256, bias=True)
975
+ )
976
+ (block1): Block(
977
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
978
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
979
+ (act): SiLU()
980
+ )
981
+ (block2): Block(
982
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
983
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
984
+ (act): SiLU()
985
+ )
986
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
987
+ )
988
+ (1): ResnetBlock(
989
+ (mlp): Sequential(
990
+ (0): SiLU()
991
+ (1): Linear(in_features=512, out_features=256, bias=True)
992
+ )
993
+ (block1): Block(
994
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
995
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
996
+ (act): SiLU()
997
+ )
998
+ (block2): Block(
999
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1000
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1001
+ (act): SiLU()
1002
+ )
1003
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1004
+ )
1005
+ (2): Residual(
1006
+ (fn): PreNorm(
1007
+ (fn): LinearAttention(
1008
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1009
+ (to_out): Sequential(
1010
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1011
+ (1): LayerNorm()
1012
+ )
1013
+ )
1014
+ (norm): LayerNorm()
1015
+ )
1016
+ )
1017
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1018
+ )
1019
+ )
1020
+ (mid_block1): ResnetBlock(
1021
+ (mlp): Sequential(
1022
+ (0): SiLU()
1023
+ (1): Linear(in_features=512, out_features=256, bias=True)
1024
+ )
1025
+ (block1): Block(
1026
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1027
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1028
+ (act): SiLU()
1029
+ )
1030
+ (block2): Block(
1031
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1032
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1033
+ (act): SiLU()
1034
+ )
1035
+ (res_conv): Identity()
1036
+ )
1037
+ (mid_attn): Residual(
1038
+ (fn): PreNorm(
1039
+ (fn): Attention(
1040
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1041
+ (to_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1042
+ )
1043
+ (norm): LayerNorm()
1044
+ )
1045
+ )
1046
+ (mid_block2): ResnetBlock(
1047
+ (mlp): Sequential(
1048
+ (0): SiLU()
1049
+ (1): Linear(in_features=512, out_features=256, bias=True)
1050
+ )
1051
+ (block1): Block(
1052
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1053
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1054
+ (act): SiLU()
1055
+ )
1056
+ (block2): Block(
1057
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1058
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1059
+ (act): SiLU()
1060
+ )
1061
+ (res_conv): Identity()
1062
+ )
1063
+ (final_res_block): ResnetBlock(
1064
+ (mlp): Sequential(
1065
+ (0): SiLU()
1066
+ (1): Linear(in_features=512, out_features=256, bias=True)
1067
+ )
1068
+ (block1): Block(
1069
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1070
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1071
+ (act): SiLU()
1072
+ )
1073
+ (block2): Block(
1074
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1075
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1076
+ (act): SiLU()
1077
+ )
1078
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1079
+ )
1080
+ (final_conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1081
+ )
1082
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1083
+ (embed): Embedding(151, 16)
1084
+ )
1085
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
1086
+ )
1087
+ 2023-03-03 13:59:40,184 - mmseg - INFO - Loaded 20210 images
1088
+ 2023-03-03 13:59:41,189 - mmseg - INFO - Loaded 2000 images
1089
+ 2023-03-03 13:59:41,192 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-124, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151
1090
+ 2023-03-03 13:59:41,192 - mmseg - INFO - Hooks will be executed in the following order:
1091
+ before_run:
1092
+ (VERY_HIGH ) StepLrUpdaterHook
1093
+ (NORMAL ) CheckpointHook
1094
+ (LOW ) DistEvalHook
1095
+ (VERY_LOW ) TextLoggerHook
1096
+ --------------------
1097
+ before_train_epoch:
1098
+ (VERY_HIGH ) StepLrUpdaterHook
1099
+ (LOW ) IterTimerHook
1100
+ (LOW ) DistEvalHook
1101
+ (VERY_LOW ) TextLoggerHook
1102
+ --------------------
1103
+ before_train_iter:
1104
+ (VERY_HIGH ) StepLrUpdaterHook
1105
+ (LOW ) IterTimerHook
1106
+ (LOW ) DistEvalHook
1107
+ --------------------
1108
+ after_train_iter:
1109
+ (ABOVE_NORMAL) OptimizerHook
1110
+ (NORMAL ) CheckpointHook
1111
+ (LOW ) IterTimerHook
1112
+ (LOW ) DistEvalHook
1113
+ (VERY_LOW ) TextLoggerHook
1114
+ --------------------
1115
+ after_train_epoch:
1116
+ (NORMAL ) CheckpointHook
1117
+ (LOW ) DistEvalHook
1118
+ (VERY_LOW ) TextLoggerHook
1119
+ --------------------
1120
+ before_val_epoch:
1121
+ (LOW ) IterTimerHook
1122
+ (VERY_LOW ) TextLoggerHook
1123
+ --------------------
1124
+ before_val_iter:
1125
+ (LOW ) IterTimerHook
1126
+ --------------------
1127
+ after_val_iter:
1128
+ (LOW ) IterTimerHook
1129
+ --------------------
1130
+ after_val_epoch:
1131
+ (VERY_LOW ) TextLoggerHook
1132
+ --------------------
1133
+ after_run:
1134
+ (VERY_LOW ) TextLoggerHook
1135
+ --------------------
1136
+ 2023-03-03 13:59:41,192 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
1137
+ 2023-03-03 13:59:41,192 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151 by HardDiskBackend.
segformer_b2_singlestep/20230303_135933.log.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+ad87029", "seed": 97773280, "exp_name": "segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py", "mmseg_version": "0.30.0+ad87029", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStep',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=272,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 97773280\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
segformer_b2_singlestep/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddd1dbf72f0bea45926f79ba05c54ab5133fcaf69080d0b2936ce54329c82a1
3
+ size 1995368606
segformer_b2_singlestep/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
3
+ model = dict(
4
+ type='EncoderDecoderFreeze',
5
+ freeze_parameters=['backbone', 'decode_head'],
6
+ pretrained=
7
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
8
+ backbone=dict(
9
+ type='MixVisionTransformerCustomInitWeights',
10
+ in_channels=3,
11
+ embed_dims=64,
12
+ num_stages=4,
13
+ num_layers=[3, 4, 6, 3],
14
+ num_heads=[1, 2, 5, 8],
15
+ patch_sizes=[7, 3, 3, 3],
16
+ sr_ratios=[8, 4, 2, 1],
17
+ out_indices=(0, 1, 2, 3),
18
+ mlp_ratio=4,
19
+ qkv_bias=True,
20
+ drop_rate=0.0,
21
+ attn_drop_rate=0.0,
22
+ drop_path_rate=0.1),
23
+ decode_head=dict(
24
+ type='SegformerHeadUnetFCHeadSingleStep',
25
+ pretrained=
26
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
27
+ dim=128,
28
+ out_dim=256,
29
+ unet_channels=272,
30
+ dim_mults=[1, 1, 1],
31
+ cat_embedding_dim=16,
32
+ in_channels=[64, 128, 320, 512],
33
+ in_index=[0, 1, 2, 3],
34
+ channels=256,
35
+ dropout_ratio=0.1,
36
+ num_classes=151,
37
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
38
+ align_corners=False,
39
+ ignore_index=0,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42
+ train_cfg=dict(),
43
+ test_cfg=dict(mode='whole'))
44
+ dataset_type = 'ADE20K151Dataset'
45
+ data_root = 'data/ade/ADEChallengeData2016'
46
+ img_norm_cfg = dict(
47
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
48
+ crop_size = (512, 512)
49
+ train_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(type='LoadAnnotations', reduce_zero_label=False),
52
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
53
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
54
+ dict(type='RandomFlip', prob=0.5),
55
+ dict(type='PhotoMetricDistortion'),
56
+ dict(
57
+ type='Normalize',
58
+ mean=[123.675, 116.28, 103.53],
59
+ std=[58.395, 57.12, 57.375],
60
+ to_rgb=True),
61
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
62
+ dict(type='DefaultFormatBundle'),
63
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
64
+ ]
65
+ test_pipeline = [
66
+ dict(type='LoadImageFromFile'),
67
+ dict(
68
+ type='MultiScaleFlipAug',
69
+ img_scale=(2048, 512),
70
+ flip=False,
71
+ transforms=[
72
+ dict(type='Resize', keep_ratio=True),
73
+ dict(type='RandomFlip'),
74
+ dict(
75
+ type='Normalize',
76
+ mean=[123.675, 116.28, 103.53],
77
+ std=[58.395, 57.12, 57.375],
78
+ to_rgb=True),
79
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
80
+ dict(type='ImageToTensor', keys=['img']),
81
+ dict(type='Collect', keys=['img'])
82
+ ])
83
+ ]
84
+ data = dict(
85
+ samples_per_gpu=4,
86
+ workers_per_gpu=4,
87
+ train=dict(
88
+ type='ADE20K151Dataset',
89
+ data_root='data/ade/ADEChallengeData2016',
90
+ img_dir='images/training',
91
+ ann_dir='annotations/training',
92
+ pipeline=[
93
+ dict(type='LoadImageFromFile'),
94
+ dict(type='LoadAnnotations', reduce_zero_label=False),
95
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
96
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
97
+ dict(type='RandomFlip', prob=0.5),
98
+ dict(type='PhotoMetricDistortion'),
99
+ dict(
100
+ type='Normalize',
101
+ mean=[123.675, 116.28, 103.53],
102
+ std=[58.395, 57.12, 57.375],
103
+ to_rgb=True),
104
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
105
+ dict(type='DefaultFormatBundle'),
106
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
107
+ ]),
108
+ val=dict(
109
+ type='ADE20K151Dataset',
110
+ data_root='data/ade/ADEChallengeData2016',
111
+ img_dir='images/validation',
112
+ ann_dir='annotations/validation',
113
+ pipeline=[
114
+ dict(type='LoadImageFromFile'),
115
+ dict(
116
+ type='MultiScaleFlipAug',
117
+ img_scale=(2048, 512),
118
+ flip=False,
119
+ transforms=[
120
+ dict(type='Resize', keep_ratio=True),
121
+ dict(type='RandomFlip'),
122
+ dict(
123
+ type='Normalize',
124
+ mean=[123.675, 116.28, 103.53],
125
+ std=[58.395, 57.12, 57.375],
126
+ to_rgb=True),
127
+ dict(
128
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
129
+ dict(type='ImageToTensor', keys=['img']),
130
+ dict(type='Collect', keys=['img'])
131
+ ])
132
+ ]),
133
+ test=dict(
134
+ type='ADE20K151Dataset',
135
+ data_root='data/ade/ADEChallengeData2016',
136
+ img_dir='images/validation',
137
+ ann_dir='annotations/validation',
138
+ pipeline=[
139
+ dict(type='LoadImageFromFile'),
140
+ dict(
141
+ type='MultiScaleFlipAug',
142
+ img_scale=(2048, 512),
143
+ flip=False,
144
+ transforms=[
145
+ dict(type='Resize', keep_ratio=True),
146
+ dict(type='RandomFlip'),
147
+ dict(
148
+ type='Normalize',
149
+ mean=[123.675, 116.28, 103.53],
150
+ std=[58.395, 57.12, 57.375],
151
+ to_rgb=True),
152
+ dict(
153
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
154
+ dict(type='ImageToTensor', keys=['img']),
155
+ dict(type='Collect', keys=['img'])
156
+ ])
157
+ ]))
158
+ log_config = dict(
159
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
160
+ dist_params = dict(backend='nccl')
161
+ log_level = 'INFO'
162
+ load_from = None
163
+ resume_from = None
164
+ workflow = [('train', 1)]
165
+ cudnn_benchmark = True
166
+ optimizer = dict(
167
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
168
+ optimizer_config = dict()
169
+ lr_config = dict(
170
+ policy='step',
171
+ warmup='linear',
172
+ warmup_iters=1000,
173
+ warmup_ratio=1e-06,
174
+ step=10000,
175
+ gamma=0.5,
176
+ min_lr=1e-06,
177
+ by_epoch=False)
178
+ runner = dict(type='IterBasedRunner', max_iters=80000)
179
+ checkpoint_config = dict(by_epoch=False, interval=8000)
180
+ evaluation = dict(
181
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
182
+ work_dir = './work_dirs/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151'
183
+ gpu_ids = range(0, 8)
184
+ auto_resume = True