KyanChen commited on
Commit
6eaafd0
1 Parent(s): 190e406

Upload 25 files

Browse files
configs/huggingface/rsprompter_anchor_NWPU_config.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 1200
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=1e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+
46
+ image_size = (1024, 1024)
47
+
48
+ data_preprocessor = dict(
49
+ type='mmdet.DetDataPreprocessor',
50
+ mean=[123.675, 116.28, 103.53],
51
+ std=[58.395, 57.12, 57.375],
52
+ bgr_to_rgb=True,
53
+ pad_size_divisor=32,
54
+ pad_mask=True,
55
+ mask_pad_value=0,
56
+ )
57
+
58
+ num_things_classes = 10
59
+ num_stuff_classes = 0
60
+ num_classes = num_things_classes + num_stuff_classes
61
+ prompt_shape = (60, 5)
62
+
63
+ model_cfg = dict(
64
+ type='SegSAMAnchorPLer',
65
+ hyperparameters=dict(
66
+ optimizer=optimizer,
67
+ param_scheduler=param_scheduler,
68
+ ),
69
+ need_train_names=sub_model_train,
70
+ data_preprocessor=data_preprocessor,
71
+ backbone=dict(
72
+ type='vit_h',
73
+ # checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
74
+ # type='vit_b',
75
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
76
+ ),
77
+ panoptic_head=dict(
78
+ type='SAMAnchorInstanceHead',
79
+ neck=dict(
80
+ type='SAMAggregatorNeck',
81
+ in_channels=[1280] * 32,
82
+ # in_channels=[768] * 12,
83
+ inner_channels=32,
84
+ selected_channels=range(4, 32, 2),
85
+ # selected_channels=range(4, 12, 2),
86
+ out_channels=256,
87
+ up_sample_scale=4,
88
+ ),
89
+ rpn_head=dict(
90
+ type='mmdet.RPNHead',
91
+ in_channels=256,
92
+ feat_channels=256,
93
+ anchor_generator=dict(
94
+ type='mmdet.AnchorGenerator',
95
+ scales=[2, 4, 8, 16, 32, 64],
96
+ ratios=[0.5, 1.0, 2.0],
97
+ strides=[8, 16, 32]),
98
+ bbox_coder=dict(
99
+ type='mmdet.DeltaXYWHBBoxCoder',
100
+ target_means=[.0, .0, .0, .0],
101
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
102
+ loss_cls=dict(
103
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
104
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
105
+ roi_head=dict(
106
+ type='SAMAnchorPromptRoIHead',
107
+ bbox_roi_extractor=dict(
108
+ type='mmdet.SingleRoIExtractor',
109
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
110
+ out_channels=256,
111
+ featmap_strides=[8, 16, 32]),
112
+ bbox_head=dict(
113
+ type='mmdet.Shared2FCBBoxHead',
114
+ in_channels=256,
115
+ fc_out_channels=1024,
116
+ roi_feat_size=7,
117
+ num_classes=num_classes,
118
+ bbox_coder=dict(
119
+ type='mmdet.DeltaXYWHBBoxCoder',
120
+ target_means=[0., 0., 0., 0.],
121
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
122
+ reg_class_agnostic=False,
123
+ loss_cls=dict(
124
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
125
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
126
+ mask_roi_extractor=dict(
127
+ type='mmdet.SingleRoIExtractor',
128
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
129
+ out_channels=256,
130
+ featmap_strides=[8, 16, 32]),
131
+ mask_head=dict(
132
+ type='SAMPromptMaskHead',
133
+ per_query_point=prompt_shape[1],
134
+ with_sincos=True,
135
+ class_agnostic=True,
136
+ loss_mask=dict(
137
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
138
+ # model training and testing settings
139
+ train_cfg=dict(
140
+ rpn=dict(
141
+ assigner=dict(
142
+ type='mmdet.MaxIoUAssigner',
143
+ pos_iou_thr=0.7,
144
+ neg_iou_thr=0.3,
145
+ min_pos_iou=0.3,
146
+ match_low_quality=True,
147
+ ignore_iof_thr=-1),
148
+ sampler=dict(
149
+ type='mmdet.RandomSampler',
150
+ num=512,
151
+ pos_fraction=0.5,
152
+ neg_pos_ub=-1,
153
+ add_gt_as_proposals=False),
154
+ allowed_border=-1,
155
+ pos_weight=-1,
156
+ debug=False),
157
+ rpn_proposal=dict(
158
+ nms_pre=2000,
159
+ max_per_img=1000,
160
+ nms=dict(type='nms', iou_threshold=0.7),
161
+ min_bbox_size=0),
162
+ rcnn=dict(
163
+ assigner=dict(
164
+ type='mmdet.MaxIoUAssigner',
165
+ pos_iou_thr=0.5,
166
+ neg_iou_thr=0.5,
167
+ min_pos_iou=0.5,
168
+ match_low_quality=True,
169
+ ignore_iof_thr=-1),
170
+ sampler=dict(
171
+ type='mmdet.RandomSampler',
172
+ num=256,
173
+ pos_fraction=0.25,
174
+ neg_pos_ub=-1,
175
+ add_gt_as_proposals=True),
176
+ mask_size=1024,
177
+ pos_weight=-1,
178
+ debug=False)),
179
+ test_cfg=dict(
180
+ rpn=dict(
181
+ nms_pre=1000,
182
+ max_per_img=1000,
183
+ nms=dict(type='nms', iou_threshold=0.7),
184
+ min_bbox_size=0),
185
+ rcnn=dict(
186
+ score_thr=0.05,
187
+ nms=dict(type='nms', iou_threshold=0.5),
188
+ max_per_img=100,
189
+ mask_thr_binary=0.5)
190
+ )
191
+ )
192
+ )
193
+
194
+
195
+ task_name = 'nwpu_ins'
196
+ exp_name = 'E20230629_1'
197
+ logger = dict(
198
+ type='WandbLogger',
199
+ project=task_name,
200
+ group='sam-anchor',
201
+ name=exp_name
202
+ )
203
+
204
+
205
+ callbacks = [
206
+ param_scheduler_callback,
207
+ dict(
208
+ type='ModelCheckpoint',
209
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
210
+ save_last=True,
211
+ mode='max',
212
+ monitor='valsegm_map_0',
213
+ save_top_k=3,
214
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
215
+ ),
216
+ dict(
217
+ type='LearningRateMonitor',
218
+ logging_interval='step'
219
+ )
220
+ ]
221
+
222
+ vis_backends = [dict(type='mmdet.LocalVisBackend')]
223
+ visualizer = dict(
224
+ type='mmdet.DetLocalVisualizer',
225
+ vis_backends=vis_backends,
226
+ name='visualizer',
227
+ fig_save_cfg=dict(
228
+ frameon=False,
229
+ figsize=(40, 20),
230
+ # dpi=300,
231
+ ),
232
+ line_width=2,
233
+ alpha=0.8
234
+ )
235
+
236
+ trainer_cfg = dict(
237
+ compiled_model=False,
238
+ accelerator="auto",
239
+ strategy="auto",
240
+ # strategy="ddp",
241
+ # strategy='ddp_find_unused_parameters_true',
242
+ # precision='32',
243
+ # precision='16-mixed',
244
+ devices=8,
245
+ default_root_dir=f'results/{task_name}/{exp_name}',
246
+ # default_root_dir='results/tmp',
247
+ max_epochs=max_epochs,
248
+ logger=logger,
249
+ callbacks=callbacks,
250
+ log_every_n_steps=5,
251
+ check_val_every_n_epoch=5,
252
+ benchmark=True,
253
+ # sync_batchnorm=True,
254
+ # fast_dev_run=True,
255
+
256
+ # limit_train_batches=1,
257
+ # limit_val_batches=0,
258
+ # limit_test_batches=None,
259
+ # limit_predict_batches=None,
260
+ # overfit_batches=0.0,
261
+
262
+ # val_check_interval=None,
263
+ # num_sanity_val_steps=0,
264
+ # enable_checkpointing=None,
265
+ # enable_progress_bar=None,
266
+ # enable_model_summary=None,
267
+ # accumulate_grad_batches=32,
268
+ # gradient_clip_val=15,
269
+ # gradient_clip_algorithm='norm',
270
+ # deterministic=None,
271
+ # inference_mode: bool=True,
272
+ use_distributed_sampler=True,
273
+ # profiler="simple",
274
+ # detect_anomaly=False,
275
+ # barebones=False,
276
+ # plugins=None,
277
+ # reload_dataloaders_every_n_epochs=0,
278
+ )
279
+
280
+
281
+ backend_args = None
282
+ train_pipeline = [
283
+ dict(type='mmdet.LoadImageFromFile'),
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(type='mmdet.Resize', scale=image_size),
286
+ dict(type='mmdet.RandomFlip', prob=0.5),
287
+ dict(type='mmdet.PackDetInputs')
288
+ ]
289
+
290
+ test_pipeline = [
291
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
292
+ dict(type='mmdet.Resize', scale=image_size),
293
+ # If you don't have a gt annotation, delete the pipeline
294
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
295
+ dict(
296
+ type='mmdet.PackDetInputs',
297
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
298
+ 'scale_factor'))
299
+ ]
300
+
301
+ predict_pipeline = [
302
+ dict(type='mmdet.Resize', scale=image_size),
303
+ dict(
304
+ type='mmdet.PackDetInputs',
305
+ meta_keys=('ori_shape', 'img_shape', 'scale_factor'))
306
+ ]
307
+
308
+ train_batch_size_per_gpu = 2
309
+ train_num_workers = 2
310
+ test_batch_size_per_gpu = 2
311
+ test_num_workers = 2
312
+ persistent_workers = True
313
+
314
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
315
+ train_data_prefix = ''
316
+ val_data_prefix = ''
317
+ dataset_type = 'NWPUInsSegDataset'
318
+
319
+ val_loader = dict(
320
+ batch_size=test_batch_size_per_gpu,
321
+ num_workers=test_num_workers,
322
+ persistent_workers=persistent_workers,
323
+ pin_memory=True,
324
+ dataset=dict(
325
+ type=dataset_type,
326
+ data_root=data_parent,
327
+ ann_file='NWPU_instances_val.json',
328
+ data_prefix=dict(img_path='positive image set'),
329
+ test_mode=True,
330
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
331
+ pipeline=test_pipeline,
332
+ backend_args=backend_args))
333
+
334
+ datamodule_cfg = dict(
335
+ type='PLDataModule',
336
+ train_loader=dict(
337
+ batch_size=train_batch_size_per_gpu,
338
+ num_workers=train_num_workers,
339
+ persistent_workers=persistent_workers,
340
+ pin_memory=True,
341
+ dataset=dict(
342
+ type=dataset_type,
343
+ data_root=data_parent,
344
+ ann_file='NWPU_instances_train.json',
345
+ data_prefix=dict(img_path='positive image set'),
346
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
347
+ pipeline=train_pipeline,
348
+ backend_args=backend_args)
349
+ ),
350
+ val_loader=val_loader,
351
+ # test_loader=val_loader
352
+ predict_loader=val_loader
353
+ )
configs/huggingface/rsprompter_anchor_SSDD_config.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 1000
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=1e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 1
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+ prompt_shape = (30, 5)
72
+
73
+ model_cfg = dict(
74
+ type='SegSAMAnchorPLer',
75
+ hyperparameters=dict(
76
+ optimizer=optimizer,
77
+ param_scheduler=param_scheduler,
78
+ evaluator=evaluator,
79
+ ),
80
+ need_train_names=sub_model_train,
81
+ data_preprocessor=data_preprocessor,
82
+ backbone=dict(
83
+ type='vit_h',
84
+ # checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
85
+ # type='vit_b',
86
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
87
+ ),
88
+ panoptic_head=dict(
89
+ type='SAMAnchorInstanceHead',
90
+ neck=dict(
91
+ type='SAMAggregatorNeck',
92
+ in_channels=[1280] * 32,
93
+ # in_channels=[768] * 12,
94
+ inner_channels=32,
95
+ selected_channels=range(4, 32, 2),
96
+ # selected_channels=range(4, 12, 2),
97
+ out_channels=256,
98
+ up_sample_scale=4,
99
+ ),
100
+ rpn_head=dict(
101
+ type='mmdet.RPNHead',
102
+ in_channels=256,
103
+ feat_channels=256,
104
+ anchor_generator=dict(
105
+ type='mmdet.AnchorGenerator',
106
+ scales=[2, 4, 8, 16, 32, 64],
107
+ ratios=[0.5, 1.0, 2.0],
108
+ strides=[8, 16, 32]),
109
+ bbox_coder=dict(
110
+ type='mmdet.DeltaXYWHBBoxCoder',
111
+ target_means=[.0, .0, .0, .0],
112
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
113
+ loss_cls=dict(
114
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
115
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
116
+ roi_head=dict(
117
+ type='SAMAnchorPromptRoIHead',
118
+ bbox_roi_extractor=dict(
119
+ type='mmdet.SingleRoIExtractor',
120
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
121
+ out_channels=256,
122
+ featmap_strides=[8, 16, 32]),
123
+ bbox_head=dict(
124
+ type='mmdet.Shared2FCBBoxHead',
125
+ in_channels=256,
126
+ fc_out_channels=1024,
127
+ roi_feat_size=7,
128
+ num_classes=num_classes,
129
+ bbox_coder=dict(
130
+ type='mmdet.DeltaXYWHBBoxCoder',
131
+ target_means=[0., 0., 0., 0.],
132
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
133
+ reg_class_agnostic=False,
134
+ loss_cls=dict(
135
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
136
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
137
+ mask_roi_extractor=dict(
138
+ type='mmdet.SingleRoIExtractor',
139
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
140
+ out_channels=256,
141
+ featmap_strides=[8, 16, 32]),
142
+ mask_head=dict(
143
+ type='SAMPromptMaskHead',
144
+ per_query_point=prompt_shape[1],
145
+ with_sincos=True,
146
+ class_agnostic=True,
147
+ loss_mask=dict(
148
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
149
+ # model training and testing settings
150
+ train_cfg=dict(
151
+ rpn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.7,
155
+ neg_iou_thr=0.3,
156
+ min_pos_iou=0.3,
157
+ match_low_quality=True,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=512,
162
+ pos_fraction=0.5,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=False),
165
+ allowed_border=-1,
166
+ pos_weight=-1,
167
+ debug=False),
168
+ rpn_proposal=dict(
169
+ nms_pre=2000,
170
+ max_per_img=1000,
171
+ nms=dict(type='nms', iou_threshold=0.7),
172
+ min_bbox_size=0),
173
+ rcnn=dict(
174
+ assigner=dict(
175
+ type='mmdet.MaxIoUAssigner',
176
+ pos_iou_thr=0.5,
177
+ neg_iou_thr=0.5,
178
+ min_pos_iou=0.5,
179
+ match_low_quality=True,
180
+ ignore_iof_thr=-1),
181
+ sampler=dict(
182
+ type='mmdet.RandomSampler',
183
+ num=256,
184
+ pos_fraction=0.25,
185
+ neg_pos_ub=-1,
186
+ add_gt_as_proposals=True),
187
+ mask_size=1024,
188
+ pos_weight=-1,
189
+ debug=False)),
190
+ test_cfg=dict(
191
+ rpn=dict(
192
+ nms_pre=1000,
193
+ max_per_img=1000,
194
+ nms=dict(type='nms', iou_threshold=0.7),
195
+ min_bbox_size=0),
196
+ rcnn=dict(
197
+ score_thr=0.05,
198
+ nms=dict(type='nms', iou_threshold=0.5),
199
+ max_per_img=100,
200
+ mask_thr_binary=0.5)
201
+ )
202
+ )
203
+ )
204
+
205
+ task_name = 'whu_ins'
206
+ exp_name = 'E20230629_0'
207
+ logger = dict(
208
+ type='WandbLogger',
209
+ project=task_name,
210
+ group='sam-anchor',
211
+ name=exp_name
212
+ )
213
+
214
+
215
+ vis_backends = [dict(type='mmdet.LocalVisBackend')]
216
+ visualizer = dict(
217
+ type='mmdet.DetLocalVisualizer',
218
+ vis_backends=vis_backends,
219
+ name='visualizer',
220
+ fig_save_cfg=dict(
221
+ frameon=False,
222
+ figsize=(40, 20),
223
+ # dpi=300,
224
+ ),
225
+ line_width=2,
226
+ alpha=0.8
227
+ )
228
+
229
+ callbacks = [
230
+ param_scheduler_callback,
231
+ dict(
232
+ type='ModelCheckpoint',
233
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
234
+ save_last=True,
235
+ mode='max',
236
+ monitor='valsegm_map_0',
237
+ save_top_k=3,
238
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
239
+ ),
240
+ dict(
241
+ type='LearningRateMonitor',
242
+ logging_interval='step'
243
+ )
244
+ ]
245
+
246
+
247
+ trainer_cfg = dict(
248
+ compiled_model=False,
249
+ accelerator="auto",
250
+ strategy="auto",
251
+ # strategy="ddp",
252
+ # strategy='ddp_find_unused_parameters_true',
253
+ # precision='32',
254
+ # precision='16-mixed',
255
+ devices=8,
256
+ default_root_dir=f'results/{task_name}/{exp_name}',
257
+ # default_root_dir='results/tmp',
258
+ max_epochs=max_epochs,
259
+ logger=logger,
260
+ callbacks=callbacks,
261
+ log_every_n_steps=5,
262
+ check_val_every_n_epoch=5,
263
+ benchmark=True,
264
+ # sync_batchnorm=True,
265
+ # fast_dev_run=True,
266
+
267
+ # limit_train_batches=1,
268
+ # limit_val_batches=0,
269
+ # limit_test_batches=None,
270
+ # limit_predict_batches=None,
271
+ # overfit_batches=0.0,
272
+
273
+ # val_check_interval=None,
274
+ # num_sanity_val_steps=0,
275
+ # enable_checkpointing=None,
276
+ # enable_progress_bar=None,
277
+ # enable_model_summary=None,
278
+ # accumulate_grad_batches=32,
279
+ # gradient_clip_val=15,
280
+ # gradient_clip_algorithm='norm',
281
+ # deterministic=None,
282
+ # inference_mode: bool=True,
283
+ use_distributed_sampler=True,
284
+ # profiler="simple",
285
+ # detect_anomaly=False,
286
+ # barebones=False,
287
+ # plugins=None,
288
+ # reload_dataloaders_every_n_epochs=0,
289
+ )
290
+
291
+
292
+ backend_args = None
293
+ train_pipeline = [
294
+ dict(type='mmdet.LoadImageFromFile'),
295
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
296
+ dict(type='mmdet.Resize', scale=image_size),
297
+ dict(type='mmdet.RandomFlip', prob=0.5),
298
+ dict(type='mmdet.PackDetInputs')
299
+ ]
300
+
301
+ test_pipeline = [
302
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
303
+ dict(type='mmdet.Resize', scale=image_size),
304
+ # If you don't have a gt annotation, delete the pipeline
305
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
306
+ dict(
307
+ type='mmdet.PackDetInputs',
308
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
309
+ 'scale_factor'))
310
+ ]
311
+
312
+
313
+ train_batch_size_per_gpu = 2
314
+ train_num_workers = 2
315
+ test_batch_size_per_gpu = 2
316
+ test_num_workers = 2
317
+ persistent_workers = True
318
+
319
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
320
+ dataset_type = 'SSDDInsSegDataset'
321
+
322
+
323
+ val_loader = dict(
324
+ batch_size=test_batch_size_per_gpu,
325
+ num_workers=test_num_workers,
326
+ persistent_workers=persistent_workers,
327
+ pin_memory=True,
328
+ dataset=dict(
329
+ type=dataset_type,
330
+ data_root=data_parent,
331
+ # ann_file='NWPU_instances_val.json',
332
+ # data_prefix=dict(img_path='positive image set'),
333
+ ann_file='annotations/SSDD_instances_val.json',
334
+ data_prefix=dict(img_path='imgs'),
335
+ test_mode=True,
336
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
337
+ pipeline=test_pipeline,
338
+ backend_args=backend_args))
339
+
340
+ predict_pipeline = [
341
+ dict(type='mmdet.Resize', scale=image_size),
342
+ dict(
343
+ type='mmdet.PackDetInputs',
344
+ meta_keys=('ori_shape', 'img_shape', 'scale_factor'))
345
+ ]
346
+
347
+
348
+ datamodule_cfg = dict(
349
+ type='PLDataModule',
350
+ train_loader=dict(
351
+ batch_size=train_batch_size_per_gpu,
352
+ num_workers=train_num_workers,
353
+ persistent_workers=persistent_workers,
354
+ pin_memory=True,
355
+ dataset=dict(
356
+ type=dataset_type,
357
+ data_root=data_parent,
358
+ # ann_file='NWPU_instances_train.json',
359
+ # data_prefix=dict(img_path='positive image set'),
360
+ ann_file='annotations/SSDD_instances_train.json',
361
+ data_prefix=dict(img_path='imgs'),
362
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
363
+ pipeline=train_pipeline,
364
+ backend_args=backend_args)
365
+ ),
366
+ val_loader=val_loader,
367
+ # test_loader=val_loader
368
+ predict_loader=val_loader
369
+ )
configs/huggingface/rsprompter_anchor_WHU_config.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 2000
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=1e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+
46
+ image_size = (1024, 1024)
47
+
48
+ data_preprocessor = dict(
49
+ type='mmdet.DetDataPreprocessor',
50
+ mean=[123.675, 116.28, 103.53],
51
+ std=[58.395, 57.12, 57.375],
52
+ bgr_to_rgb=True,
53
+ pad_size_divisor=32,
54
+ pad_mask=True,
55
+ mask_pad_value=0,
56
+ )
57
+
58
+ num_things_classes = 1
59
+ num_stuff_classes = 0
60
+ num_classes = num_things_classes + num_stuff_classes
61
+ prompt_shape = (90, 4)
62
+
63
+ model_cfg = dict(
64
+ type='SegSAMAnchorPLer',
65
+ hyperparameters=dict(
66
+ optimizer=optimizer,
67
+ param_scheduler=param_scheduler,
68
+ ),
69
+ need_train_names=sub_model_train,
70
+ data_preprocessor=data_preprocessor,
71
+ backbone=dict(
72
+ type='vit_h'
73
+ # type='vit_b',
74
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
75
+ ),
76
+ panoptic_head=dict(
77
+ type='SAMAnchorInstanceHead',
78
+ neck=dict(
79
+ type='SAMAggregatorNeck',
80
+ in_channels=[1280] * 32,
81
+ # in_channels=[768] * 12,
82
+ inner_channels=32,
83
+ selected_channels=range(4, 32, 2),
84
+ # selected_channels=range(4, 12, 2),
85
+ out_channels=256,
86
+ up_sample_scale=4,
87
+ ),
88
+ rpn_head=dict(
89
+ type='mmdet.RPNHead',
90
+ in_channels=256,
91
+ feat_channels=256,
92
+ anchor_generator=dict(
93
+ type='mmdet.AnchorGenerator',
94
+ scales=[2, 4, 8, 16, 32, 64],
95
+ ratios=[0.5, 1.0, 2.0],
96
+ strides=[8, 16, 32]),
97
+ bbox_coder=dict(
98
+ type='mmdet.DeltaXYWHBBoxCoder',
99
+ target_means=[.0, .0, .0, .0],
100
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
101
+ loss_cls=dict(
102
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
103
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
104
+ roi_head=dict(
105
+ type='SAMAnchorPromptRoIHead',
106
+ bbox_roi_extractor=dict(
107
+ type='mmdet.SingleRoIExtractor',
108
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
109
+ out_channels=256,
110
+ featmap_strides=[8, 16, 32]),
111
+ bbox_head=dict(
112
+ type='mmdet.Shared2FCBBoxHead',
113
+ in_channels=256,
114
+ fc_out_channels=1024,
115
+ roi_feat_size=7,
116
+ num_classes=num_classes,
117
+ bbox_coder=dict(
118
+ type='mmdet.DeltaXYWHBBoxCoder',
119
+ target_means=[0., 0., 0., 0.],
120
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
121
+ reg_class_agnostic=False,
122
+ loss_cls=dict(
123
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
124
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
125
+ mask_roi_extractor=dict(
126
+ type='mmdet.SingleRoIExtractor',
127
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
128
+ out_channels=256,
129
+ featmap_strides=[8, 16, 32]),
130
+ mask_head=dict(
131
+ type='SAMPromptMaskHead',
132
+ per_query_point=prompt_shape[1],
133
+ with_sincos=True,
134
+ class_agnostic=True,
135
+ loss_mask=dict(
136
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
137
+ # model training and testing settings
138
+ train_cfg=dict(
139
+ rpn=dict(
140
+ assigner=dict(
141
+ type='mmdet.MaxIoUAssigner',
142
+ pos_iou_thr=0.7,
143
+ neg_iou_thr=0.3,
144
+ min_pos_iou=0.3,
145
+ match_low_quality=True,
146
+ ignore_iof_thr=-1),
147
+ sampler=dict(
148
+ type='mmdet.RandomSampler',
149
+ num=512,
150
+ pos_fraction=0.5,
151
+ neg_pos_ub=-1,
152
+ add_gt_as_proposals=False),
153
+ allowed_border=-1,
154
+ pos_weight=-1,
155
+ debug=False),
156
+ rpn_proposal=dict(
157
+ nms_pre=2000,
158
+ max_per_img=1000,
159
+ nms=dict(type='nms', iou_threshold=0.7),
160
+ min_bbox_size=0),
161
+ rcnn=dict(
162
+ assigner=dict(
163
+ type='mmdet.MaxIoUAssigner',
164
+ pos_iou_thr=0.5,
165
+ neg_iou_thr=0.5,
166
+ min_pos_iou=0.5,
167
+ match_low_quality=True,
168
+ ignore_iof_thr=-1),
169
+ sampler=dict(
170
+ type='mmdet.RandomSampler',
171
+ num=256,
172
+ pos_fraction=0.25,
173
+ neg_pos_ub=-1,
174
+ add_gt_as_proposals=True),
175
+ mask_size=1024,
176
+ pos_weight=-1,
177
+ debug=False)),
178
+ test_cfg=dict(
179
+ rpn=dict(
180
+ nms_pre=1000,
181
+ max_per_img=1000,
182
+ nms=dict(type='nms', iou_threshold=0.7),
183
+ min_bbox_size=0),
184
+ rcnn=dict(
185
+ score_thr=0.05,
186
+ nms=dict(type='nms', iou_threshold=0.5),
187
+ max_per_img=100,
188
+ mask_thr_binary=0.5)
189
+ )
190
+ )
191
+ )
192
+
193
+ task_name = 'whu_ins'
194
+ exp_name = 'E20230629_0'
195
+ logger = dict(
196
+ type='WandbLogger',
197
+ project=task_name,
198
+ group='sam-anchor',
199
+ name=exp_name
200
+ )
201
+
202
+
203
+ callbacks = [
204
+ param_scheduler_callback,
205
+ dict(
206
+ type='ModelCheckpoint',
207
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
208
+ save_last=True,
209
+ mode='max',
210
+ monitor='valsegm_map_0',
211
+ save_top_k=3,
212
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
213
+ ),
214
+ dict(
215
+ type='LearningRateMonitor',
216
+ logging_interval='step'
217
+ ),
218
+ dict(
219
+ type='DetVisualizationHook',
220
+ draw=True,
221
+ interval=1,
222
+ score_thr=0.4,
223
+ show=False,
224
+ wait_time=1.,
225
+ test_out_dir='visualization',
226
+ )
227
+ ]
228
+
229
+ vis_backends = [dict(type='mmdet.LocalVisBackend')]
230
+ visualizer = dict(
231
+ type='mmdet.DetLocalVisualizer',
232
+ vis_backends=vis_backends,
233
+ name='visualizer',
234
+ fig_save_cfg=dict(
235
+ frameon=False,
236
+ figsize=(40, 20),
237
+ # dpi=300,
238
+ ),
239
+ line_width=2,
240
+ alpha=0.8
241
+ )
242
+
243
+ trainer_cfg = dict(
244
+ compiled_model=False,
245
+ accelerator="auto",
246
+ strategy="auto",
247
+ # strategy="ddp",
248
+ # strategy='ddp_find_unused_parameters_true',
249
+ # precision='32',
250
+ # precision='16-mixed',
251
+ devices=8,
252
+ default_root_dir=f'results/{task_name}/{exp_name}',
253
+ # default_root_dir='results/tmp',
254
+ max_epochs=max_epochs,
255
+ logger=logger,
256
+ callbacks=callbacks,
257
+ log_every_n_steps=10,
258
+ check_val_every_n_epoch=5,
259
+ benchmark=True,
260
+ # sync_batchnorm=True,
261
+ # fast_dev_run=True,
262
+
263
+ # limit_train_batches=1,
264
+ # limit_val_batches=0,
265
+ # limit_test_batches=None,
266
+ # limit_predict_batches=None,
267
+ # overfit_batches=0.0,
268
+
269
+ # val_check_interval=None,
270
+ # num_sanity_val_steps=0,
271
+ # enable_checkpointing=None,
272
+ # enable_progress_bar=None,
273
+ # enable_model_summary=None,
274
+ # accumulate_grad_batches=32,
275
+ # gradient_clip_val=15,
276
+ # gradient_clip_algorithm='norm',
277
+ # deterministic=None,
278
+ # inference_mode: bool=True,
279
+ use_distributed_sampler=True,
280
+ # profiler="simple",
281
+ # detect_anomaly=False,
282
+ # barebones=False,
283
+ # plugins=None,
284
+ # reload_dataloaders_every_n_epochs=0,
285
+ )
286
+
287
+
288
+ backend_args = None
289
+ train_pipeline = [
290
+ dict(type='mmdet.LoadImageFromFile'),
291
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
292
+ dict(type='mmdet.Resize', scale=image_size),
293
+ dict(type='mmdet.RandomFlip', prob=0.5),
294
+ dict(type='mmdet.PackDetInputs')
295
+ ]
296
+
297
+ test_pipeline = [
298
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
299
+ dict(type='mmdet.Resize', scale=image_size),
300
+ # If you don't have a gt annotation, delete the pipeline
301
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
302
+ dict(
303
+ type='mmdet.PackDetInputs',
304
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
305
+ 'scale_factor'))
306
+ ]
307
+
308
+ predict_pipeline = [
309
+ dict(type='mmdet.Resize', scale=image_size),
310
+ dict(
311
+ type='mmdet.PackDetInputs',
312
+ meta_keys=('ori_shape', 'img_shape', 'scale_factor'))
313
+ ]
314
+
315
+
316
+ train_batch_size_per_gpu = 2
317
+ train_num_workers = 2
318
+ test_batch_size_per_gpu = 2
319
+ test_num_workers = 2
320
+ persistent_workers = True
321
+
322
+
323
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
324
+ train_data_prefix = 'train/'
325
+ val_data_prefix = 'test/'
326
+ dataset_type = 'WHUInsSegDataset'
327
+
328
+
329
+ val_loader = dict(
330
+ batch_size=test_batch_size_per_gpu,
331
+ num_workers=test_num_workers,
332
+ persistent_workers=persistent_workers,
333
+ pin_memory=True,
334
+ dataset=dict(
335
+ type=dataset_type,
336
+ data_root=data_parent,
337
+ # ann_file='NWPU_instances_val.json',
338
+ # data_prefix=dict(img_path='positive image set'),
339
+ # ann_file='annotations/SSDD_instances_val.json',
340
+ # data_prefix=dict(img_path='imgs'),
341
+ ann_file='annotations/WHU_building_test.json',
342
+ data_prefix=dict(img_path=val_data_prefix + '/image'),
343
+ test_mode=True,
344
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
345
+ pipeline=test_pipeline,
346
+ backend_args=backend_args))
347
+
348
+ datamodule_cfg = dict(
349
+ type='PLDataModule',
350
+ train_loader=dict(
351
+ batch_size=train_batch_size_per_gpu,
352
+ num_workers=train_num_workers,
353
+ persistent_workers=persistent_workers,
354
+ pin_memory=True,
355
+ dataset=dict(
356
+ type=dataset_type,
357
+ data_root=data_parent,
358
+ # ann_file='NWPU_instances_train.json',
359
+ # data_prefix=dict(img_path='positive image set'),
360
+ # ann_file='annotations/SSDD_instances_train.json',
361
+ # data_prefix=dict(img_path='imgs'),
362
+ ann_file='annotations/WHU_building_train.json',
363
+ data_prefix=dict(img_path=train_data_prefix + '/image'),
364
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
365
+ pipeline=train_pipeline,
366
+ backend_args=backend_args)
367
+ ),
368
+ val_loader=val_loader,
369
+ # test_loader=val_loader
370
+ predict_loader=val_loader
371
+ )
configs/rsprompter/mask2former_nwpu_config.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models', 'mmdet.models'], allow_failed_imports=False)
2
+ max_epochs = 2000
3
+
4
+ optimizer = dict(
5
+ type='AdamW',
6
+ lr=0.0002,
7
+ weight_decay=1e-4
8
+ )
9
+
10
+ param_scheduler = [
11
+ # warm up learning rate scheduler
12
+ dict(
13
+ type='LinearLR',
14
+ start_factor=1e-4,
15
+ by_epoch=True,
16
+ begin=0,
17
+ end=1,
18
+ # update by iter
19
+ convert_to_iter_based=True),
20
+ # main learning rate scheduler
21
+ dict(
22
+ type='CosineAnnealingLR',
23
+ T_max=max_epochs,
24
+ by_epoch=True,
25
+ begin=1,
26
+ end=max_epochs,
27
+ )
28
+ ]
29
+
30
+ param_scheduler_callback = dict(
31
+ type='ParamSchedulerHook'
32
+ )
33
+
34
+
35
+ evaluator_ = dict(
36
+ type='CocoPLMetric',
37
+ metric=['bbox', 'segm'],
38
+ proposal_nums=[1, 10, 100]
39
+ )
40
+
41
+ evaluator = dict(
42
+ val_evaluator=evaluator_,
43
+ test_evaluator=evaluator_
44
+ )
45
+
46
+
47
+ image_size = (1024, 1024)
48
+ data_preprocessor = dict(
49
+ type='mmdet.DetDataPreprocessor',
50
+ mean=[123.675, 116.28, 103.53],
51
+ std=[58.395, 57.12, 57.375],
52
+ bgr_to_rgb=True,
53
+ pad_mask=True,
54
+ mask_pad_value=0,
55
+ pad_size_divisor=32
56
+ )
57
+
58
+ num_things_classes = 10
59
+ num_stuff_classes = 0
60
+ num_classes = num_things_classes + num_stuff_classes
61
+ num_queries = 60
62
+
63
+ # model settings
64
+ model = dict(
65
+ type='mmdet.Mask2Former',
66
+ data_preprocessor=data_preprocessor,
67
+ backbone=dict(
68
+ type='mmdet.ResNet',
69
+ depth=50,
70
+ num_stages=4,
71
+ out_indices=(0, 1, 2, 3),
72
+ frozen_stages=-1,
73
+ norm_cfg=dict(type='BN', requires_grad=False),
74
+ norm_eval=True,
75
+ style='pytorch',
76
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
77
+ panoptic_head=dict(
78
+ type='mmdet.Mask2FormerHead',
79
+ in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
80
+ strides=[4, 8, 16, 32],
81
+ feat_channels=256,
82
+ out_channels=256,
83
+ num_things_classes=num_things_classes,
84
+ num_stuff_classes=num_stuff_classes,
85
+ num_queries=num_queries,
86
+ num_transformer_feat_level=3,
87
+ pixel_decoder=dict(
88
+ type='mmdet.MSDeformAttnPixelDecoder',
89
+ num_outs=3,
90
+ norm_cfg=dict(type='GN', num_groups=32),
91
+ act_cfg=dict(type='ReLU'),
92
+ encoder=dict( # DeformableDetrTransformerEncoder
93
+ # num_layers=6,
94
+ num_layers=2,
95
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
96
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
97
+ embed_dims=256,
98
+ num_heads=8,
99
+ num_levels=3,
100
+ num_points=4,
101
+ dropout=0.0,
102
+ batch_first=True),
103
+ ffn_cfg=dict(
104
+ embed_dims=256,
105
+ feedforward_channels=1024,
106
+ num_fcs=2,
107
+ ffn_drop=0.0,
108
+ act_cfg=dict(type='ReLU', inplace=True)))),
109
+ positional_encoding=dict(num_feats=128, normalize=True)),
110
+ enforce_decoder_input_project=False,
111
+ positional_encoding=dict(num_feats=128, normalize=True),
112
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
113
+ return_intermediate=True,
114
+ # num_layers=9,
115
+ num_layers=3,
116
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
117
+ self_attn_cfg=dict( # MultiheadAttention
118
+ embed_dims=256,
119
+ num_heads=8,
120
+ dropout=0.0,
121
+ batch_first=True),
122
+ cross_attn_cfg=dict( # MultiheadAttention
123
+ embed_dims=256,
124
+ num_heads=8,
125
+ dropout=0.0,
126
+ batch_first=True),
127
+ ffn_cfg=dict(
128
+ embed_dims=256,
129
+ feedforward_channels=2048,
130
+ num_fcs=2,
131
+ ffn_drop=0.0,
132
+ act_cfg=dict(type='ReLU', inplace=True))),
133
+ init_cfg=None),
134
+ loss_cls=dict(
135
+ type='mmdet.CrossEntropyLoss',
136
+ use_sigmoid=False,
137
+ loss_weight=2.0,
138
+ reduction='mean',
139
+ class_weight=[1.0] * num_classes + [0.1]),
140
+ loss_mask=dict(
141
+ type='mmdet.CrossEntropyLoss',
142
+ use_sigmoid=True,
143
+ reduction='mean',
144
+ loss_weight=5.0),
145
+ loss_dice=dict(
146
+ type='mmdet.DiceLoss',
147
+ use_sigmoid=True,
148
+ activate=True,
149
+ reduction='mean',
150
+ naive_dice=True,
151
+ eps=1.0,
152
+ loss_weight=5.0)),
153
+ panoptic_fusion_head=dict(
154
+ type='mmdet.MaskFormerFusionHead',
155
+ num_things_classes=num_things_classes,
156
+ num_stuff_classes=num_stuff_classes,
157
+ loss_panoptic=None,
158
+ init_cfg=None),
159
+ train_cfg=dict(
160
+ num_points=12544,
161
+ oversample_ratio=3.0,
162
+ importance_sample_ratio=0.75,
163
+ assigner=dict(
164
+ type='mmdet.HungarianAssigner',
165
+ match_costs=[
166
+ dict(type='mmdet.ClassificationCost', weight=2.0),
167
+ dict(
168
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
169
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
170
+ ]),
171
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
172
+ test_cfg=dict(
173
+ panoptic_on=False,
174
+ # For now, the dataset does not support
175
+ # evaluating semantic segmentation metric.
176
+ semantic_on=False,
177
+ instance_on=True,
178
+ # max_per_image is for instance segmentation.
179
+ max_per_image=100,
180
+ iou_thr=0.8,
181
+ # In Mask2Former's panoptic postprocessing,
182
+ # it will filter mask area where score is less than 0.5 .
183
+ filter_low_score=True),
184
+ init_cfg=None)
185
+
186
+
187
+ model_cfg = dict(
188
+ type='MMDetPLer',
189
+ hyperparameters=dict(
190
+ optimizer=optimizer,
191
+ param_scheduler=param_scheduler,
192
+ evaluator=evaluator,
193
+ ),
194
+ whole_model=model,
195
+ )
196
+
197
+ task_name = 'nwpu_ins'
198
+ exp_name = 'E20230604_4'
199
+ logger = dict(
200
+ type='WandbLogger',
201
+ project=task_name,
202
+ group='mask2former',
203
+ name=exp_name
204
+ )
205
+ # logger = None
206
+
207
+
208
+ callbacks = [
209
+ param_scheduler_callback,
210
+ dict(
211
+ type='ModelCheckpoint',
212
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
213
+ save_last=True,
214
+ mode='max',
215
+ monitor='valsegm_map_0',
216
+ save_top_k=2,
217
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
218
+ ),
219
+ dict(
220
+ type='LearningRateMonitor',
221
+ logging_interval='step'
222
+ )
223
+ ]
224
+
225
+
226
+ trainer_cfg = dict(
227
+ compiled_model=False,
228
+ accelerator="auto",
229
+ strategy="auto",
230
+ # strategy="ddp",
231
+ # strategy='ddp_find_unused_parameters_true',
232
+ # precision='32',
233
+ # precision='16-mixed',
234
+ devices=8,
235
+ default_root_dir=f'results/{task_name}/{exp_name}',
236
+ # default_root_dir='results/tmp',
237
+ max_epochs=max_epochs,
238
+ logger=logger,
239
+ callbacks=callbacks,
240
+ log_every_n_steps=5,
241
+ check_val_every_n_epoch=5,
242
+ benchmark=True,
243
+ # sync_batchnorm=True,
244
+ # fast_dev_run=True,
245
+
246
+ # limit_train_batches=1,
247
+ # limit_val_batches=0,
248
+ # limit_test_batches=None,
249
+ # limit_predict_batches=None,
250
+ # overfit_batches=0.0,
251
+
252
+ # val_check_interval=None,
253
+ # num_sanity_val_steps=0,
254
+ # enable_checkpointing=None,
255
+ # enable_progress_bar=None,
256
+ # enable_model_summary=None,
257
+ # accumulate_grad_batches=32,
258
+ # gradient_clip_val=15,
259
+ # gradient_clip_algorithm='norm',
260
+ # deterministic=None,
261
+ # inference_mode: bool=True,
262
+ use_distributed_sampler=True,
263
+ # profiler="simple",
264
+ # detect_anomaly=False,
265
+ # barebones=False,
266
+ # plugins=None,
267
+ # reload_dataloaders_every_n_epochs=0,
268
+ )
269
+
270
+
271
+ backend_args = None
272
+ train_pipeline = [
273
+ dict(type='mmdet.LoadImageFromFile'),
274
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
275
+ dict(type='mmdet.Resize', scale=image_size),
276
+ dict(type='mmdet.RandomFlip', prob=0.5),
277
+ dict(type='mmdet.PackDetInputs')
278
+ ]
279
+
280
+ test_pipeline = [
281
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
282
+ dict(type='mmdet.Resize', scale=image_size),
283
+ # If you don't have a gt annotation, delete the pipeline
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(
286
+ type='mmdet.PackDetInputs',
287
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
288
+ 'scale_factor'))
289
+ ]
290
+
291
+
292
+ train_batch_size_per_gpu = 8
293
+ train_num_workers = 4
294
+ test_batch_size_per_gpu = 8
295
+ test_num_workers = 4
296
+ persistent_workers = True
297
+
298
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
299
+ train_data_prefix = ''
300
+ val_data_prefix = ''
301
+
302
+ dataset_type = 'NWPUInsSegDataset'
303
+
304
+ val_loader = dict(
305
+ batch_size=test_batch_size_per_gpu,
306
+ num_workers=test_num_workers,
307
+ persistent_workers=persistent_workers,
308
+ pin_memory=True,
309
+ dataset=dict(
310
+ type=dataset_type,
311
+ data_root=data_parent,
312
+ ann_file='NWPU_instances_val.json',
313
+ data_prefix=dict(img_path='positive image set'),
314
+ test_mode=True,
315
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
316
+ pipeline=test_pipeline,
317
+ backend_args=backend_args))
318
+
319
+ datamodule_cfg = dict(
320
+ type='PLDataModule',
321
+ train_loader=dict(
322
+ batch_size=train_batch_size_per_gpu,
323
+ num_workers=train_num_workers,
324
+ persistent_workers=persistent_workers,
325
+ pin_memory=True,
326
+ dataset=dict(
327
+ type=dataset_type,
328
+ data_root=data_parent,
329
+ ann_file='NWPU_instances_train.json',
330
+ data_prefix=dict(img_path='positive image set'),
331
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
332
+ pipeline=train_pipeline,
333
+ backend_args=backend_args)
334
+ ),
335
+ val_loader=val_loader,
336
+ test_loader=val_loader,
337
+ predict_loader=val_loader
338
+ )
configs/rsprompter/mask2former_ssdd_config.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ max_epochs = 600
4
+
5
+ optimizer = dict(
6
+ type='AdamW',
7
+ lr=0.0005,
8
+ weight_decay=1e-3
9
+ )
10
+
11
+ param_scheduler = [
12
+ # warm up learning rate scheduler
13
+ dict(
14
+ type='LinearLR',
15
+ start_factor=1e-4,
16
+ by_epoch=True,
17
+ begin=0,
18
+ end=1,
19
+ # update by iter
20
+ convert_to_iter_based=True),
21
+ # main learning rate scheduler
22
+ dict(
23
+ type='CosineAnnealingLR',
24
+ T_max=max_epochs,
25
+ by_epoch=True,
26
+ begin=1,
27
+ end=max_epochs,
28
+ )
29
+ ]
30
+
31
+ param_scheduler_callback = dict(
32
+ type='ParamSchedulerHook'
33
+ )
34
+
35
+
36
+ evaluator_ = dict(
37
+ type='CocoPLMetric',
38
+ metric=['bbox', 'segm'],
39
+ proposal_nums=[1, 10, 100]
40
+ )
41
+
42
+
43
+ evaluator = dict(
44
+ # train_evaluator=evaluator_,
45
+ val_evaluator=evaluator_,
46
+ test_evaluator=evaluator_,
47
+ )
48
+
49
+ image_size = (512, 512)
50
+ data_preprocessor = dict(
51
+ type='mmdet.DetDataPreprocessor',
52
+ mean=[123.675, 116.28, 103.53],
53
+ std=[58.395, 57.12, 57.375],
54
+ bgr_to_rgb=True,
55
+ pad_size_divisor=32,
56
+ pad_mask=True,
57
+ mask_pad_value=0,
58
+ )
59
+
60
+ num_things_classes = 1
61
+ num_stuff_classes = 0
62
+ num_classes = num_things_classes + num_stuff_classes
63
+ num_queries = 30
64
+
65
+ model = dict(
66
+ type='mmdet.Mask2Former',
67
+ data_preprocessor=data_preprocessor,
68
+ backbone=dict(
69
+ type='mmdet.ResNet',
70
+ depth=50,
71
+ num_stages=4,
72
+ out_indices=(0, 1, 2, 3),
73
+ frozen_stages=-1,
74
+ norm_cfg=dict(type='BN', requires_grad=False),
75
+ norm_eval=True,
76
+ style='pytorch',
77
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
78
+ panoptic_head=dict(
79
+ type='mmdet.Mask2FormerHead',
80
+ in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
81
+ strides=[4, 8, 16, 32],
82
+ feat_channels=256,
83
+ out_channels=256,
84
+ num_things_classes=num_things_classes,
85
+ num_stuff_classes=num_stuff_classes,
86
+ num_queries=num_queries,
87
+ num_transformer_feat_level=3,
88
+ pixel_decoder=dict(
89
+ type='mmdet.MSDeformAttnPixelDecoder',
90
+ num_outs=3,
91
+ norm_cfg=dict(type='GN', num_groups=32),
92
+ act_cfg=dict(type='ReLU'),
93
+ encoder=dict( # DeformableDetrTransformerEncoder
94
+ num_layers=3,
95
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
96
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
97
+ embed_dims=256,
98
+ num_heads=8,
99
+ num_levels=3,
100
+ num_points=4,
101
+ dropout=0.0,
102
+ batch_first=True),
103
+ ffn_cfg=dict(
104
+ embed_dims=256,
105
+ feedforward_channels=1024,
106
+ num_fcs=2,
107
+ ffn_drop=0.0,
108
+ act_cfg=dict(type='ReLU', inplace=True)))),
109
+ positional_encoding=dict(num_feats=128, normalize=True)),
110
+ enforce_decoder_input_project=False,
111
+ positional_encoding=dict(num_feats=128, normalize=True),
112
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
113
+ return_intermediate=True,
114
+ num_layers=3,
115
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
116
+ self_attn_cfg=dict( # MultiheadAttention
117
+ embed_dims=256,
118
+ num_heads=8,
119
+ dropout=0.0,
120
+ batch_first=True),
121
+ cross_attn_cfg=dict( # MultiheadAttention
122
+ embed_dims=256,
123
+ num_heads=8,
124
+ dropout=0.0,
125
+ batch_first=True),
126
+ ffn_cfg=dict(
127
+ embed_dims=256,
128
+ feedforward_channels=2048,
129
+ num_fcs=2,
130
+ ffn_drop=0.0,
131
+ act_cfg=dict(type='ReLU', inplace=True))),
132
+ init_cfg=None),
133
+ loss_cls=dict(
134
+ type='mmdet.CrossEntropyLoss',
135
+ use_sigmoid=False,
136
+ loss_weight=2.0,
137
+ reduction='mean',
138
+ class_weight=[1.0] * num_classes + [0.1]),
139
+ loss_mask=dict(
140
+ type='mmdet.CrossEntropyLoss',
141
+ use_sigmoid=True,
142
+ reduction='mean',
143
+ loss_weight=5.0),
144
+ loss_dice=dict(
145
+ type='mmdet.DiceLoss',
146
+ use_sigmoid=True,
147
+ activate=True,
148
+ reduction='mean',
149
+ naive_dice=True,
150
+ eps=1.0,
151
+ loss_weight=5.0)),
152
+ panoptic_fusion_head=dict(
153
+ type='mmdet.MaskFormerFusionHead',
154
+ num_things_classes=num_things_classes,
155
+ num_stuff_classes=num_stuff_classes,
156
+ loss_panoptic=None,
157
+ init_cfg=None),
158
+ train_cfg=dict(
159
+ num_points=12544,
160
+ oversample_ratio=3.0,
161
+ importance_sample_ratio=0.75,
162
+ assigner=dict(
163
+ type='mmdet.HungarianAssigner',
164
+ match_costs=[
165
+ dict(type='mmdet.ClassificationCost', weight=2.0),
166
+ dict(
167
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
168
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
169
+ ]),
170
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
171
+ test_cfg=dict(
172
+ panoptic_on=False,
173
+ # For now, the dataset does not support
174
+ # evaluating semantic segmentation metric.
175
+ semantic_on=False,
176
+ instance_on=True,
177
+ # max_per_image is for instance segmentation.
178
+ max_per_image=num_queries,
179
+ iou_thr=0.8,
180
+ # In Mask2Former's panoptic postprocessing,
181
+ # it will filter mask area where score is less than 0.5 .
182
+ filter_low_score=True),
183
+ init_cfg=None)
184
+
185
+
186
+ model_cfg = dict(
187
+ type='MMDetPLer',
188
+ hyperparameters=dict(
189
+ optimizer=optimizer,
190
+ param_scheduler=param_scheduler,
191
+ evaluator=evaluator,
192
+ ),
193
+ whole_model=model,
194
+ )
195
+
196
+ task_name = 'ssdd_ins'
197
+ exp_name = 'E20230527_0'
198
+ logger = dict(
199
+ type='WandbLogger',
200
+ project=task_name,
201
+ group='mask2former',
202
+ name=exp_name
203
+ )
204
+ # logger = None
205
+
206
+
207
+ callbacks = [
208
+ param_scheduler_callback,
209
+ dict(
210
+ type='ModelCheckpoint',
211
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
212
+ save_last=True,
213
+ mode='max',
214
+ monitor='valsegm_map_0',
215
+ save_top_k=2,
216
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
217
+ ),
218
+ dict(
219
+ type='LearningRateMonitor',
220
+ logging_interval='step'
221
+ )
222
+ ]
223
+
224
+
225
+ trainer_cfg = dict(
226
+ compiled_model=False,
227
+ accelerator="auto",
228
+ strategy="auto",
229
+ # strategy="ddp",
230
+ # strategy='ddp_find_unused_parameters_true',
231
+ # precision='32',
232
+ # precision='16-mixed',
233
+ devices=4,
234
+ default_root_dir=f'results/{task_name}/{exp_name}',
235
+ # default_root_dir='results/tmp',
236
+ max_epochs=max_epochs,
237
+ logger=logger,
238
+ callbacks=callbacks,
239
+ log_every_n_steps=10,
240
+ check_val_every_n_epoch=10,
241
+ benchmark=True,
242
+ # sync_batchnorm=True,
243
+ # fast_dev_run=True,
244
+
245
+ # limit_train_batches=1,
246
+ # limit_val_batches=0,
247
+ # limit_test_batches=None,
248
+ # limit_predict_batches=None,
249
+ # overfit_batches=0.0,
250
+
251
+ # val_check_interval=None,
252
+ # num_sanity_val_steps=0,
253
+ # enable_checkpointing=None,
254
+ # enable_progress_bar=None,
255
+ # enable_model_summary=None,
256
+ # accumulate_grad_batches=32,
257
+ # gradient_clip_val=15,
258
+ # gradient_clip_algorithm='norm',
259
+ # deterministic=None,
260
+ # inference_mode: bool=True,
261
+ use_distributed_sampler=True,
262
+ # profiler="simple",
263
+ # detect_anomaly=False,
264
+ # barebones=False,
265
+ # plugins=None,
266
+ # reload_dataloaders_every_n_epochs=0,
267
+ )
268
+
269
+
270
+ backend_args = None
271
+ train_pipeline = [
272
+ dict(type='mmdet.LoadImageFromFile'),
273
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
274
+ dict(type='mmdet.Resize', scale=image_size),
275
+ dict(type='mmdet.RandomFlip', prob=0.5),
276
+ dict(type='mmdet.PackDetInputs')
277
+ ]
278
+
279
+ test_pipeline = [
280
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
281
+ dict(type='mmdet.Resize', scale=image_size),
282
+ # If you don't have a gt annotation, delete the pipeline
283
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
284
+ dict(
285
+ type='mmdet.PackDetInputs',
286
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
287
+ 'scale_factor'))
288
+ ]
289
+
290
+
291
+ train_batch_size_per_gpu = 8
292
+ train_num_workers = 4
293
+ test_batch_size_per_gpu = 8
294
+ test_num_workers = 4
295
+ persistent_workers = True
296
+
297
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
298
+
299
+ dataset_type = 'SSDDInsSegDataset'
300
+
301
+ val_loader = dict(
302
+ batch_size=test_batch_size_per_gpu,
303
+ num_workers=test_num_workers,
304
+ persistent_workers=persistent_workers,
305
+ pin_memory=True,
306
+ dataset=dict(
307
+ type=dataset_type,
308
+ data_root=data_parent,
309
+ ann_file='annotations/SSDD_instances_val.json',
310
+ data_prefix=dict(img_path='imgs'),
311
+ test_mode=True,
312
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
313
+ pipeline=test_pipeline,
314
+ backend_args=backend_args))
315
+
316
+ datamodule_cfg = dict(
317
+ type='PLDataModule',
318
+ train_loader=dict(
319
+ batch_size=train_batch_size_per_gpu,
320
+ num_workers=train_num_workers,
321
+ persistent_workers=persistent_workers,
322
+ pin_memory=True,
323
+ dataset=dict(
324
+ type=dataset_type,
325
+ data_root=data_parent,
326
+ ann_file='annotations/SSDD_instances_train.json',
327
+ data_prefix=dict(img_path='imgs'),
328
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
329
+ pipeline=train_pipeline,
330
+ backend_args=backend_args)
331
+ ),
332
+ val_loader=val_loader,
333
+ test_loader=val_loader,
334
+ predict_loader=val_loader
335
+ )
configs/rsprompter/mask2former_whu_config.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ max_epochs = 400
4
+
5
+ optimizer = dict(
6
+ type='AdamW',
7
+ lr=0.0005,
8
+ weight_decay=1e-3
9
+ )
10
+
11
+ param_scheduler = [
12
+ # warm up learning rate scheduler
13
+ dict(
14
+ type='LinearLR',
15
+ start_factor=1e-4,
16
+ by_epoch=True,
17
+ begin=0,
18
+ end=1,
19
+ # update by iter
20
+ convert_to_iter_based=True),
21
+ # main learning rate scheduler
22
+ dict(
23
+ type='CosineAnnealingLR',
24
+ T_max=max_epochs,
25
+ by_epoch=True,
26
+ begin=1,
27
+ end=max_epochs,
28
+ )
29
+ ]
30
+
31
+ param_scheduler_callback = dict(
32
+ type='ParamSchedulerHook'
33
+ )
34
+
35
+ evaluator_ = dict(
36
+ type='CocoPLMetric',
37
+ metric=['bbox', 'segm'],
38
+ proposal_nums=[1, 10, 100],
39
+ )
40
+
41
+ evaluator = dict(
42
+ val_evaluator=evaluator_,
43
+ test_evaluator=evaluator_,
44
+ )
45
+
46
+
47
+ image_size = (512, 512)
48
+ data_preprocessor = dict(
49
+ type='mmdet.DetDataPreprocessor',
50
+ mean=[123.675, 116.28, 103.53],
51
+ std=[58.395, 57.12, 57.375],
52
+ bgr_to_rgb=True,
53
+ pad_size_divisor=32,
54
+ pad_mask=True,
55
+ mask_pad_value=0,
56
+ )
57
+
58
+ num_things_classes = 1
59
+ num_stuff_classes = 0
60
+ num_classes = num_things_classes + num_stuff_classes
61
+ num_queries = 90
62
+
63
+ model = dict(
64
+ type='mmdet.Mask2Former',
65
+ data_preprocessor=data_preprocessor,
66
+ backbone=dict(
67
+ type='mmdet.ResNet',
68
+ depth=50,
69
+ num_stages=4,
70
+ out_indices=(0, 1, 2, 3),
71
+ frozen_stages=-1,
72
+ norm_cfg=dict(type='BN', requires_grad=False),
73
+ norm_eval=True,
74
+ style='pytorch',
75
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
76
+ panoptic_head=dict(
77
+ type='mmdet.Mask2FormerHead',
78
+ in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
79
+ strides=[4, 8, 16, 32],
80
+ feat_channels=256,
81
+ out_channels=256,
82
+ num_things_classes=num_things_classes,
83
+ num_stuff_classes=num_stuff_classes,
84
+ num_queries=num_queries,
85
+ num_transformer_feat_level=3,
86
+ pixel_decoder=dict(
87
+ type='mmdet.MSDeformAttnPixelDecoder',
88
+ num_outs=3,
89
+ norm_cfg=dict(type='GN', num_groups=32),
90
+ act_cfg=dict(type='ReLU'),
91
+ encoder=dict( # DeformableDetrTransformerEncoder
92
+ num_layers=3,
93
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
94
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
95
+ embed_dims=256,
96
+ num_heads=8,
97
+ num_levels=3,
98
+ num_points=4,
99
+ dropout=0.0,
100
+ batch_first=True),
101
+ ffn_cfg=dict(
102
+ embed_dims=256,
103
+ feedforward_channels=1024,
104
+ num_fcs=2,
105
+ ffn_drop=0.0,
106
+ act_cfg=dict(type='ReLU', inplace=True)))),
107
+ positional_encoding=dict(num_feats=128, normalize=True)),
108
+ enforce_decoder_input_project=False,
109
+ positional_encoding=dict(num_feats=128, normalize=True),
110
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
111
+ return_intermediate=True,
112
+ num_layers=3,
113
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
114
+ self_attn_cfg=dict( # MultiheadAttention
115
+ embed_dims=256,
116
+ num_heads=8,
117
+ dropout=0.0,
118
+ batch_first=True),
119
+ cross_attn_cfg=dict( # MultiheadAttention
120
+ embed_dims=256,
121
+ num_heads=8,
122
+ dropout=0.0,
123
+ batch_first=True),
124
+ ffn_cfg=dict(
125
+ embed_dims=256,
126
+ feedforward_channels=2048,
127
+ num_fcs=2,
128
+ ffn_drop=0.0,
129
+ act_cfg=dict(type='ReLU', inplace=True))),
130
+ init_cfg=None),
131
+ loss_cls=dict(
132
+ type='mmdet.CrossEntropyLoss',
133
+ use_sigmoid=False,
134
+ loss_weight=2.0,
135
+ reduction='mean',
136
+ class_weight=[1.0] * num_classes + [0.1]),
137
+ loss_mask=dict(
138
+ type='mmdet.CrossEntropyLoss',
139
+ use_sigmoid=True,
140
+ reduction='mean',
141
+ loss_weight=5.0),
142
+ loss_dice=dict(
143
+ type='mmdet.DiceLoss',
144
+ use_sigmoid=True,
145
+ activate=True,
146
+ reduction='mean',
147
+ naive_dice=True,
148
+ eps=1.0,
149
+ loss_weight=5.0)),
150
+ panoptic_fusion_head=dict(
151
+ type='mmdet.MaskFormerFusionHead',
152
+ num_things_classes=num_things_classes,
153
+ num_stuff_classes=num_stuff_classes,
154
+ loss_panoptic=None,
155
+ init_cfg=None),
156
+ train_cfg=dict(
157
+ num_points=12544,
158
+ oversample_ratio=3.0,
159
+ importance_sample_ratio=0.75,
160
+ assigner=dict(
161
+ type='mmdet.HungarianAssigner',
162
+ match_costs=[
163
+ dict(type='mmdet.ClassificationCost', weight=2.0),
164
+ dict(
165
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
166
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
167
+ ]),
168
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
169
+ test_cfg=dict(
170
+ panoptic_on=False,
171
+ # For now, the dataset does not support
172
+ # evaluating semantic segmentation metric.
173
+ semantic_on=False,
174
+ instance_on=True,
175
+ # max_per_image is for instance segmentation.
176
+ max_per_image=100,
177
+ iou_thr=0.8,
178
+ # In Mask2Former's panoptic postprocessing,
179
+ # it will filter mask area where score is less than 0.5 .
180
+ filter_low_score=True),
181
+ init_cfg=None)
182
+
183
+
184
+ model_cfg = dict(
185
+ type='MMDetPLer',
186
+ hyperparameters=dict(
187
+ optimizer=optimizer,
188
+ param_scheduler=param_scheduler,
189
+ evaluator=evaluator,
190
+ ),
191
+ whole_model=model,
192
+ )
193
+
194
+ task_name = 'whu_ins'
195
+ exp_name = 'E20230525_1'
196
+ logger = dict(
197
+ type='WandbLogger',
198
+ project=task_name,
199
+ group='mask2former',
200
+ name=exp_name
201
+ )
202
+ # logger = None
203
+
204
+
205
+ callbacks = [
206
+ param_scheduler_callback,
207
+ dict(
208
+ type='ModelCheckpoint',
209
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
210
+ save_last=True,
211
+ mode='max',
212
+ monitor='valmap_0',
213
+ save_top_k=2,
214
+ filename='epoch_{epoch}-map_{valmap_0:.4f}'
215
+ ),
216
+ dict(
217
+ type='LearningRateMonitor',
218
+ logging_interval='step'
219
+ )
220
+ ]
221
+
222
+
223
+ trainer_cfg = dict(
224
+ compiled_model=False,
225
+ accelerator="auto",
226
+ strategy="auto",
227
+ # strategy="ddp",
228
+ # strategy='ddp_find_unused_parameters_true',
229
+ # precision='32',
230
+ # precision='16-mixed',
231
+ devices=4,
232
+ default_root_dir=f'results/{task_name}/{exp_name}',
233
+ # default_root_dir='results/tmp',
234
+ max_epochs=max_epochs,
235
+ logger=logger,
236
+ callbacks=callbacks,
237
+ log_every_n_steps=20,
238
+ check_val_every_n_epoch=10,
239
+ benchmark=True,
240
+ # sync_batchnorm=True,
241
+ # fast_dev_run=True,
242
+
243
+ # limit_train_batches=1,
244
+ # limit_val_batches=0,
245
+ # limit_test_batches=None,
246
+ # limit_predict_batches=None,
247
+ # overfit_batches=0.0,
248
+
249
+ # val_check_interval=None,
250
+ # num_sanity_val_steps=0,
251
+ # enable_checkpointing=None,
252
+ # enable_progress_bar=None,
253
+ # enable_model_summary=None,
254
+ # accumulate_grad_batches=32,
255
+ # gradient_clip_val=15,
256
+ # gradient_clip_algorithm='norm',
257
+ # deterministic=None,
258
+ # inference_mode: bool=True,
259
+ use_distributed_sampler=True,
260
+ # profiler="simple",
261
+ # detect_anomaly=False,
262
+ # barebones=False,
263
+ # plugins=None,
264
+ # reload_dataloaders_every_n_epochs=0,
265
+ )
266
+
267
+
268
+ backend_args = None
269
+ train_pipeline = [
270
+ dict(type='mmdet.LoadImageFromFile'),
271
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
272
+ dict(type='mmdet.Resize', scale=image_size),
273
+ dict(type='mmdet.RandomFlip', prob=0.5),
274
+ dict(type='mmdet.PackDetInputs')
275
+ ]
276
+
277
+ test_pipeline = [
278
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
279
+ dict(type='mmdet.Resize', scale=image_size),
280
+ # If you don't have a gt annotation, delete the pipeline
281
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
282
+ dict(
283
+ type='mmdet.PackDetInputs',
284
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
285
+ 'scale_factor'))
286
+ ]
287
+
288
+
289
+ train_batch_size_per_gpu = 8
290
+ train_num_workers = 4
291
+ test_batch_size_per_gpu = 8
292
+ test_num_workers = 4
293
+ persistent_workers = True
294
+
295
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
296
+ train_data_prefix = 'train/'
297
+ val_data_prefix = 'test/'
298
+
299
+ dataset_type = 'WHUInsSegDataset'
300
+
301
+ val_loader = dict(
302
+ batch_size=test_batch_size_per_gpu,
303
+ num_workers=test_num_workers,
304
+ persistent_workers=persistent_workers,
305
+ pin_memory=True,
306
+ dataset=dict(
307
+ type=dataset_type,
308
+ data_root=data_parent,
309
+ ann_file='annotations/WHU_building_test.json',
310
+ data_prefix=dict(img_path=val_data_prefix + '/image', seg_path=val_data_prefix + '/label'),
311
+ test_mode=True,
312
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
313
+ pipeline=test_pipeline,
314
+ backend_args=backend_args))
315
+
316
+ datamodule_cfg = dict(
317
+ type='PLDataModule',
318
+ train_loader=dict(
319
+ batch_size=train_batch_size_per_gpu,
320
+ num_workers=train_num_workers,
321
+ persistent_workers=persistent_workers,
322
+ pin_memory=True,
323
+ dataset=dict(
324
+ type=dataset_type,
325
+ data_root=data_parent,
326
+ ann_file='annotations/WHU_building_train.json',
327
+ data_prefix=dict(img_path=train_data_prefix + '/image', seg_path=train_data_prefix + '/label'),
328
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
329
+ pipeline=train_pipeline,
330
+ backend_args=backend_args)
331
+ ),
332
+ val_loader=val_loader,
333
+ test_loader=val_loader,
334
+ predict_loader=val_loader
335
+ )
configs/rsprompter/maskrcnn_nwpu_config.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models', 'mmdet.models'], allow_failed_imports=False)
2
+ max_epochs = 500
3
+
4
+ optimizer = dict(
5
+ type='AdamW',
6
+ lr=0.0005,
7
+ weight_decay=1e-4
8
+ )
9
+
10
+ param_scheduler = [
11
+ # warm up learning rate scheduler
12
+ dict(
13
+ type='LinearLR',
14
+ start_factor=1e-4,
15
+ by_epoch=True,
16
+ begin=0,
17
+ end=1,
18
+ # update by iter
19
+ convert_to_iter_based=True),
20
+ # main learning rate scheduler
21
+ dict(
22
+ type='CosineAnnealingLR',
23
+ T_max=max_epochs,
24
+ by_epoch=True,
25
+ begin=1,
26
+ end=max_epochs,
27
+ )
28
+ ]
29
+
30
+ param_scheduler_callback = dict(
31
+ type='ParamSchedulerHook'
32
+ )
33
+
34
+
35
+ evaluator_ = dict(
36
+ type='CocoPLMetric',
37
+ metric=['bbox', 'segm'],
38
+ proposal_nums=[1, 10, 100]
39
+ )
40
+
41
+ evaluator = dict(
42
+ val_evaluator=evaluator_,
43
+ test_evaluator=evaluator_
44
+ )
45
+
46
+
47
+ image_size = (1024, 1024)
48
+ data_preprocessor = dict(
49
+ type='mmdet.DetDataPreprocessor',
50
+ mean=[123.675, 116.28, 103.53],
51
+ std=[58.395, 57.12, 57.375],
52
+ bgr_to_rgb=True,
53
+ pad_mask=True,
54
+ mask_pad_value=0,
55
+ pad_size_divisor=32
56
+ )
57
+
58
+ num_things_classes = 10
59
+ num_stuff_classes = 0
60
+ num_classes = num_things_classes + num_stuff_classes
61
+
62
+ # model settings
63
+ model = dict(
64
+ type='mmdet.MaskRCNN',
65
+ data_preprocessor=data_preprocessor,
66
+ backbone=dict(
67
+ type='mmdet.ResNet',
68
+ depth=50,
69
+ num_stages=4,
70
+ out_indices=(0, 1, 2, 3),
71
+ frozen_stages=1,
72
+ norm_cfg=dict(type='BN', requires_grad=True),
73
+ norm_eval=True,
74
+ style='pytorch',
75
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')
76
+ ),
77
+ neck=dict(
78
+ type='mmdet.FPN',
79
+ in_channels=[256, 512, 1024, 2048],
80
+ out_channels=256,
81
+ num_outs=5),
82
+ rpn_head=dict(
83
+ type='mmdet.RPNHead',
84
+ in_channels=256,
85
+ feat_channels=256,
86
+ anchor_generator=dict(
87
+ type='mmdet.AnchorGenerator',
88
+ scales=[8],
89
+ ratios=[0.5, 1.0, 2.0],
90
+ strides=[4, 8, 16, 32, 64]),
91
+ bbox_coder=dict(
92
+ type='mmdet.DeltaXYWHBBoxCoder',
93
+ target_means=[.0, .0, .0, .0],
94
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
95
+ loss_cls=dict(
96
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
97
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
98
+ roi_head=dict(
99
+ type='mmdet.StandardRoIHead',
100
+ bbox_roi_extractor=dict(
101
+ type='mmdet.SingleRoIExtractor',
102
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
103
+ out_channels=256,
104
+ featmap_strides=[4, 8, 16, 32]),
105
+ bbox_head=dict(
106
+ type='mmdet.Shared2FCBBoxHead',
107
+ in_channels=256,
108
+ fc_out_channels=1024,
109
+ roi_feat_size=7,
110
+ num_classes=num_classes,
111
+ bbox_coder=dict(
112
+ type='mmdet.DeltaXYWHBBoxCoder',
113
+ target_means=[0., 0., 0., 0.],
114
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
115
+ reg_class_agnostic=False,
116
+ loss_cls=dict(
117
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
118
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
119
+ mask_roi_extractor=dict(
120
+ type='mmdet.SingleRoIExtractor',
121
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
122
+ out_channels=256,
123
+ featmap_strides=[4, 8, 16, 32]),
124
+ mask_head=dict(
125
+ type='mmdet.FCNMaskHead',
126
+ num_convs=4,
127
+ in_channels=256,
128
+ conv_out_channels=256,
129
+ num_classes=num_classes,
130
+ loss_mask=dict(
131
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
132
+ # model training and testing settings
133
+ train_cfg=dict(
134
+ rpn=dict(
135
+ assigner=dict(
136
+ type='mmdet.MaxIoUAssigner',
137
+ pos_iou_thr=0.7,
138
+ neg_iou_thr=0.3,
139
+ min_pos_iou=0.3,
140
+ match_low_quality=True,
141
+ ignore_iof_thr=-1),
142
+ sampler=dict(
143
+ type='mmdet.RandomSampler',
144
+ num=256,
145
+ pos_fraction=0.5,
146
+ neg_pos_ub=-1,
147
+ add_gt_as_proposals=False),
148
+ allowed_border=-1,
149
+ pos_weight=-1,
150
+ debug=False),
151
+ rpn_proposal=dict(
152
+ nms_pre=2000,
153
+ max_per_img=1000,
154
+ nms=dict(type='nms', iou_threshold=0.7),
155
+ min_bbox_size=0),
156
+ rcnn=dict(
157
+ assigner=dict(
158
+ type='mmdet.MaxIoUAssigner',
159
+ pos_iou_thr=0.5,
160
+ neg_iou_thr=0.5,
161
+ min_pos_iou=0.5,
162
+ match_low_quality=True,
163
+ ignore_iof_thr=-1),
164
+ sampler=dict(
165
+ type='mmdet.RandomSampler',
166
+ num=512,
167
+ pos_fraction=0.25,
168
+ neg_pos_ub=-1,
169
+ add_gt_as_proposals=True),
170
+ mask_size=28,
171
+ pos_weight=-1,
172
+ debug=False)),
173
+ test_cfg=dict(
174
+ rpn=dict(
175
+ nms_pre=1000,
176
+ max_per_img=1000,
177
+ nms=dict(type='nms', iou_threshold=0.7),
178
+ min_bbox_size=0),
179
+ rcnn=dict(
180
+ score_thr=0.05,
181
+ nms=dict(type='nms', iou_threshold=0.5),
182
+ max_per_img=100,
183
+ mask_thr_binary=0.5)
184
+ )
185
+ )
186
+
187
+
188
+ model_cfg = dict(
189
+ type='MMDetPLer',
190
+ hyperparameters=dict(
191
+ optimizer=optimizer,
192
+ param_scheduler=param_scheduler,
193
+ evaluator=evaluator,
194
+ ),
195
+ whole_model=model,
196
+ )
197
+
198
+ task_name = 'nwpu_ins'
199
+ exp_name = 'E20230520_0'
200
+ logger = dict(
201
+ type='WandbLogger',
202
+ project=task_name,
203
+ group='maskrcnn',
204
+ name=exp_name
205
+ )
206
+ # logger = None
207
+
208
+
209
+ callbacks = [
210
+ param_scheduler_callback,
211
+ dict(
212
+ type='ModelCheckpoint',
213
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
214
+ save_last=True,
215
+ mode='max',
216
+ monitor='valmap_0',
217
+ save_top_k=2,
218
+ filename='epoch_{epoch}-map_{valmap_0:.4f}'
219
+ ),
220
+ dict(
221
+ type='LearningRateMonitor',
222
+ logging_interval='step'
223
+ )
224
+ ]
225
+
226
+
227
+ trainer_cfg = dict(
228
+ compiled_model=False,
229
+ accelerator="cpu",
230
+ strategy="auto",
231
+ # strategy="ddp",
232
+ # strategy='ddp_find_unused_parameters_true',
233
+ # precision='32',
234
+ # precision='16-mixed',
235
+ devices=1,
236
+ default_root_dir=f'results/{task_name}/{exp_name}',
237
+ # default_root_dir='results/tmp',
238
+ max_epochs=max_epochs,
239
+ logger=logger,
240
+ callbacks=callbacks,
241
+ log_every_n_steps=3,
242
+ check_val_every_n_epoch=5,
243
+ benchmark=True,
244
+ # sync_batchnorm=True,
245
+ # fast_dev_run=True,
246
+
247
+ # limit_train_batches=1,
248
+ # limit_val_batches=0,
249
+ # limit_test_batches=None,
250
+ # limit_predict_batches=None,
251
+ # overfit_batches=0.0,
252
+
253
+ # val_check_interval=None,
254
+ # num_sanity_val_steps=0,
255
+ # enable_checkpointing=None,
256
+ # enable_progress_bar=None,
257
+ # enable_model_summary=None,
258
+ # accumulate_grad_batches=32,
259
+ # gradient_clip_val=15,
260
+ # gradient_clip_algorithm='norm',
261
+ # deterministic=None,
262
+ # inference_mode: bool=True,
263
+ use_distributed_sampler=True,
264
+ # profiler="simple",
265
+ # detect_anomaly=False,
266
+ # barebones=False,
267
+ # plugins=None,
268
+ # reload_dataloaders_every_n_epochs=0,
269
+ )
270
+
271
+
272
+ backend_args = None
273
+ train_pipeline = [
274
+ dict(type='mmdet.LoadImageFromFile'),
275
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
276
+ dict(type='mmdet.Resize', scale=image_size),
277
+ dict(type='mmdet.RandomFlip', prob=0.5),
278
+ dict(type='mmdet.PackDetInputs')
279
+ ]
280
+
281
+ test_pipeline = [
282
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
283
+ dict(type='mmdet.Resize', scale=image_size),
284
+ # If you don't have a gt annotation, delete the pipeline
285
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
286
+ dict(
287
+ type='mmdet.PackDetInputs',
288
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
289
+ 'scale_factor'))
290
+ ]
291
+
292
+
293
+ train_batch_size_per_gpu = 2
294
+ train_num_workers = 4
295
+ test_batch_size_per_gpu = 2
296
+ test_num_workers = 4
297
+ persistent_workers = True
298
+
299
+ data_parent = '/Users/kyanchen/datasets/seg/VHR-10_dataset_coco/NWPUVHR-10_dataset/'
300
+ train_data_prefix = ''
301
+ val_data_prefix = ''
302
+
303
+ dataset_type = 'NWPUInsSegDataset'
304
+
305
+ val_loader = dict(
306
+ batch_size=test_batch_size_per_gpu,
307
+ num_workers=test_num_workers,
308
+ persistent_workers=persistent_workers,
309
+ pin_memory=True,
310
+ dataset=dict(
311
+ type=dataset_type,
312
+ data_root=data_parent,
313
+ ann_file='NWPU_instances_val.json',
314
+ data_prefix=dict(img_path='positive image set'),
315
+ test_mode=True,
316
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
317
+ pipeline=test_pipeline,
318
+ backend_args=backend_args))
319
+
320
+ datamodule_cfg = dict(
321
+ type='PLDataModule',
322
+ train_loader=dict(
323
+ batch_size=train_batch_size_per_gpu,
324
+ num_workers=train_num_workers,
325
+ persistent_workers=persistent_workers,
326
+ pin_memory=True,
327
+ dataset=dict(
328
+ type=dataset_type,
329
+ data_root=data_parent,
330
+ ann_file='NWPU_instances_train.json',
331
+ data_prefix=dict(img_path='positive image set'),
332
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
333
+ pipeline=train_pipeline,
334
+ backend_args=backend_args)
335
+ ),
336
+ val_loader=val_loader,
337
+ test_loader=val_loader,
338
+ predict_loader=val_loader
339
+ )
configs/rsprompter/maskrcnn_ssdd_config.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models', 'mmdet.models'], allow_failed_imports=False)
2
+
3
+ max_epochs = 500
4
+
5
+ optimizer = dict(
6
+ type='AdamW',
7
+ lr=0.0005,
8
+ weight_decay=1e-4
9
+ )
10
+
11
+ param_scheduler = [
12
+ # warm up learning rate scheduler
13
+ dict(
14
+ type='LinearLR',
15
+ start_factor=1e-4,
16
+ by_epoch=True,
17
+ begin=0,
18
+ end=1,
19
+ # update by iter
20
+ convert_to_iter_based=True),
21
+ # main learning rate scheduler
22
+ dict(
23
+ type='CosineAnnealingLR',
24
+ T_max=max_epochs,
25
+ by_epoch=True,
26
+ begin=1,
27
+ end=max_epochs,
28
+ )
29
+ ]
30
+
31
+ param_scheduler_callback = dict(
32
+ type='ParamSchedulerHook'
33
+ )
34
+
35
+
36
+ evaluator_ = dict(
37
+ type='CocoPLMetric',
38
+ metric=['bbox', 'segm'],
39
+ proposal_nums=[1, 10, 100]
40
+ )
41
+
42
+ evaluator = dict(
43
+ val_evaluator=evaluator_,
44
+ )
45
+
46
+
47
+ image_size = (512, 512)
48
+ data_preprocessor = dict(
49
+ type='mmdet.DetDataPreprocessor',
50
+ mean=[123.675, 116.28, 103.53],
51
+ std=[58.395, 57.12, 57.375],
52
+ bgr_to_rgb=True,
53
+ pad_mask=True,
54
+ mask_pad_value=0,
55
+ pad_size_divisor=32
56
+ )
57
+
58
+ num_things_classes = 1
59
+ num_stuff_classes = 0
60
+ num_classes = num_things_classes + num_stuff_classes
61
+ num_queries = 100
62
+
63
+ # model settings
64
+ model = dict(
65
+ type='mmdet.MaskRCNN',
66
+ data_preprocessor=data_preprocessor,
67
+ backbone=dict(
68
+ type='mmdet.ResNet',
69
+ depth=50,
70
+ num_stages=4,
71
+ out_indices=(0, 1, 2, 3),
72
+ frozen_stages=-1,
73
+ norm_cfg=dict(type='BN', requires_grad=True),
74
+ norm_eval=True,
75
+ style='pytorch',
76
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')
77
+ ),
78
+ neck=dict(
79
+ type='mmdet.FPN',
80
+ in_channels=[256, 512, 1024, 2048],
81
+ out_channels=256,
82
+ num_outs=5),
83
+ rpn_head=dict(
84
+ type='mmdet.RPNHead',
85
+ in_channels=256,
86
+ feat_channels=256,
87
+ anchor_generator=dict(
88
+ type='mmdet.AnchorGenerator',
89
+ scales=[8],
90
+ ratios=[0.5, 1.0, 2.0],
91
+ strides=[4, 8, 16, 32, 64]),
92
+ bbox_coder=dict(
93
+ type='mmdet.DeltaXYWHBBoxCoder',
94
+ target_means=[.0, .0, .0, .0],
95
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
96
+ loss_cls=dict(
97
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
98
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
99
+ roi_head=dict(
100
+ type='mmdet.StandardRoIHead',
101
+ bbox_roi_extractor=dict(
102
+ type='mmdet.SingleRoIExtractor',
103
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
104
+ out_channels=256,
105
+ featmap_strides=[4, 8, 16, 32]),
106
+ bbox_head=dict(
107
+ type='mmdet.Shared2FCBBoxHead',
108
+ in_channels=256,
109
+ fc_out_channels=1024,
110
+ roi_feat_size=7,
111
+ num_classes=num_classes,
112
+ bbox_coder=dict(
113
+ type='mmdet.DeltaXYWHBBoxCoder',
114
+ target_means=[0., 0., 0., 0.],
115
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
116
+ reg_class_agnostic=False,
117
+ loss_cls=dict(
118
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
119
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
120
+ mask_roi_extractor=dict(
121
+ type='mmdet.SingleRoIExtractor',
122
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
123
+ out_channels=256,
124
+ featmap_strides=[4, 8, 16, 32]),
125
+ mask_head=dict(
126
+ type='mmdet.FCNMaskHead',
127
+ num_convs=4,
128
+ in_channels=256,
129
+ conv_out_channels=256,
130
+ num_classes=num_classes,
131
+ loss_mask=dict(
132
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
133
+ # model training and testing settings
134
+ train_cfg=dict(
135
+ rpn=dict(
136
+ assigner=dict(
137
+ type='mmdet.MaxIoUAssigner',
138
+ pos_iou_thr=0.7,
139
+ neg_iou_thr=0.3,
140
+ min_pos_iou=0.3,
141
+ match_low_quality=True,
142
+ ignore_iof_thr=-1),
143
+ sampler=dict(
144
+ type='mmdet.RandomSampler',
145
+ num=256,
146
+ pos_fraction=0.5,
147
+ neg_pos_ub=-1,
148
+ add_gt_as_proposals=False),
149
+ allowed_border=-1,
150
+ pos_weight=-1,
151
+ debug=False),
152
+ rpn_proposal=dict(
153
+ nms_pre=2000,
154
+ max_per_img=1000,
155
+ nms=dict(type='nms', iou_threshold=0.7),
156
+ min_bbox_size=0),
157
+ rcnn=dict(
158
+ assigner=dict(
159
+ type='mmdet.MaxIoUAssigner',
160
+ pos_iou_thr=0.5,
161
+ neg_iou_thr=0.5,
162
+ min_pos_iou=0.5,
163
+ match_low_quality=True,
164
+ ignore_iof_thr=-1),
165
+ sampler=dict(
166
+ type='mmdet.RandomSampler',
167
+ num=512,
168
+ pos_fraction=0.25,
169
+ neg_pos_ub=-1,
170
+ add_gt_as_proposals=True),
171
+ mask_size=28,
172
+ pos_weight=-1,
173
+ debug=False)),
174
+ test_cfg=dict(
175
+ rpn=dict(
176
+ nms_pre=1000,
177
+ max_per_img=1000,
178
+ nms=dict(type='nms', iou_threshold=0.7),
179
+ min_bbox_size=0),
180
+ rcnn=dict(
181
+ score_thr=0.05,
182
+ nms=dict(type='nms', iou_threshold=0.5),
183
+ max_per_img=100,
184
+ mask_thr_binary=0.5)
185
+ )
186
+ )
187
+
188
+
189
+ model_cfg = dict(
190
+ type='MMDetPLer',
191
+ hyperparameters=dict(
192
+ optimizer=optimizer,
193
+ param_scheduler=param_scheduler,
194
+ evaluator=evaluator,
195
+ ),
196
+ whole_model=model,
197
+ )
198
+
199
+ task_name = 'ssdd_ins'
200
+ exp_name = 'E20230526_0'
201
+ logger = dict(
202
+ type='WandbLogger',
203
+ project=task_name,
204
+ group='maskrcnn',
205
+ name=exp_name
206
+ )
207
+ # logger = None
208
+
209
+
210
+ callbacks = [
211
+ param_scheduler_callback,
212
+ dict(
213
+ type='ModelCheckpoint',
214
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
215
+ save_last=True,
216
+ mode='max',
217
+ monitor='valmap_0',
218
+ save_top_k=2,
219
+ filename='epoch_{epoch}-map_{valmap_0:.4f}'
220
+ # mode='min',
221
+ # monitor='train_loss',
222
+ # save_top_k=2,
223
+ # filename='epoch_{epoch}-trainloss_{train_loss:.4f}'
224
+ ),
225
+ dict(
226
+ type='LearningRateMonitor',
227
+ logging_interval='step'
228
+ )
229
+ ]
230
+
231
+
232
+ trainer_cfg = dict(
233
+ compiled_model=False,
234
+ accelerator="auto",
235
+ strategy="auto",
236
+ # strategy="ddp",
237
+ # strategy='ddp_find_unused_parameters_true',
238
+ # precision='32',
239
+ # precision='16-mixed',
240
+ devices=4,
241
+ default_root_dir=f'results/{task_name}/{exp_name}',
242
+ # default_root_dir='results/tmp',
243
+ max_epochs=max_epochs,
244
+ logger=logger,
245
+ callbacks=callbacks,
246
+ log_every_n_steps=10,
247
+ check_val_every_n_epoch=10,
248
+ benchmark=True,
249
+ # sync_batchnorm=True,
250
+ # fast_dev_run=True,
251
+
252
+ # limit_train_batches=1,
253
+ # limit_val_batches=0,
254
+ # limit_test_batches=None,
255
+ # limit_predict_batches=None,
256
+ # overfit_batches=0.0,
257
+
258
+ # val_check_interval=None,
259
+ # num_sanity_val_steps=1,
260
+ # enable_checkpointing=None,
261
+ # enable_progress_bar=None,
262
+ # enable_model_summary=None,
263
+ # accumulate_grad_batches=32,
264
+ # gradient_clip_val=15,
265
+ # gradient_clip_algorithm='norm',
266
+ # deterministic=None,
267
+ # inference_mode: bool=True,
268
+ use_distributed_sampler=True,
269
+ # profiler="simple",
270
+ # detect_anomaly=False,
271
+ # barebones=False,
272
+ # plugins=None,
273
+ # reload_dataloaders_every_n_epochs=0,
274
+ )
275
+
276
+
277
+ backend_args = None
278
+ train_pipeline = [
279
+ dict(type='mmdet.LoadImageFromFile'),
280
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
281
+ dict(type='mmdet.Resize', scale=image_size),
282
+ dict(type='mmdet.RandomFlip', prob=0.5),
283
+ dict(type='mmdet.PackDetInputs')
284
+ ]
285
+
286
+ test_pipeline = [
287
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
288
+ dict(type='mmdet.Resize', scale=image_size),
289
+ # If you don't have a gt annotation, delete the pipeline
290
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
291
+ dict(
292
+ type='mmdet.PackDetInputs',
293
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
294
+ 'scale_factor'))
295
+ ]
296
+
297
+
298
+ train_batch_size_per_gpu = 8
299
+ train_num_workers = 4
300
+ test_batch_size_per_gpu = 8
301
+ test_num_workers = 4
302
+ persistent_workers = True
303
+
304
+ data_parent = '/Users/kyanchen/datasets/seg/SSDD'
305
+ # data_parent = '/mnt/search01/dataset/cky_data/SSDD'
306
+
307
+ dataset_type = 'SSDDInsSegDataset'
308
+
309
+ val_loader = dict(
310
+ batch_size=test_batch_size_per_gpu,
311
+ num_workers=test_num_workers,
312
+ persistent_workers=persistent_workers,
313
+ pin_memory=True,
314
+ dataset=dict(
315
+ type=dataset_type,
316
+ data_root=data_parent,
317
+ ann_file='annotations/SSDD_instances_val.json',
318
+ data_prefix=dict(img_path='imgs'),
319
+ test_mode=True,
320
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
321
+ pipeline=test_pipeline,
322
+ backend_args=backend_args
323
+ )
324
+ )
325
+
326
+ datamodule_cfg = dict(
327
+ type='PLDataModule',
328
+ train_loader=dict(
329
+ batch_size=train_batch_size_per_gpu,
330
+ num_workers=train_num_workers,
331
+ persistent_workers=persistent_workers,
332
+ pin_memory=True,
333
+ dataset=dict(
334
+ type=dataset_type,
335
+ data_root=data_parent,
336
+ ann_file='annotations/SSDD_instances_train.json',
337
+ data_prefix=dict(img_path='imgs'),
338
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
339
+ pipeline=train_pipeline,
340
+ backend_args=backend_args)
341
+ ),
342
+ val_loader=val_loader,
343
+ test_loader=val_loader,
344
+ predict_loader=val_loader
345
+ )
configs/rsprompter/maskrcnn_whu_config.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models', 'mmdet.models'], allow_failed_imports=False)
2
+
3
+ max_epochs = 150
4
+
5
+ optimizer = dict(
6
+ type='AdamW',
7
+ lr=0.0005,
8
+ weight_decay=1e-4
9
+ )
10
+
11
+ param_scheduler = [
12
+ # warm up learning rate scheduler
13
+ dict(
14
+ type='LinearLR',
15
+ start_factor=1e-4,
16
+ by_epoch=True,
17
+ begin=0,
18
+ end=1,
19
+ # update by iter
20
+ convert_to_iter_based=True),
21
+ # main learning rate scheduler
22
+ dict(
23
+ type='CosineAnnealingLR',
24
+ T_max=max_epochs,
25
+ by_epoch=True,
26
+ begin=1,
27
+ end=max_epochs,
28
+ )
29
+ ]
30
+
31
+ param_scheduler_callback = dict(
32
+ type='ParamSchedulerHook'
33
+ )
34
+
35
+ evaluator_ = dict(
36
+ type='MeanAveragePrecision',
37
+ # iou_type='segm',
38
+ iou_type='bbox',
39
+ # dist_sync_on_step=True,
40
+ # compute_on_cpu=True,
41
+ )
42
+
43
+ evaluator_ = dict(
44
+ type='CocoPLMetric',
45
+ metric=['bbox', 'segm'],
46
+ proposal_nums=[1, 10, 100]
47
+ )
48
+
49
+ evaluator = dict(
50
+ val_evaluator=evaluator_,
51
+ )
52
+
53
+
54
+ image_size = (512, 512)
55
+ data_preprocessor = dict(
56
+ type='mmdet.DetDataPreprocessor',
57
+ mean=[123.675, 116.28, 103.53],
58
+ std=[58.395, 57.12, 57.375],
59
+ bgr_to_rgb=True,
60
+ pad_mask=True,
61
+ mask_pad_value=0,
62
+ pad_size_divisor=32
63
+ )
64
+
65
+ num_things_classes = 1
66
+ num_stuff_classes = 0
67
+ num_classes = num_things_classes + num_stuff_classes
68
+ num_queries = 90
69
+
70
+ # model settings
71
+ model = dict(
72
+ type='mmdet.MaskRCNN',
73
+ data_preprocessor=data_preprocessor,
74
+ backbone=dict(
75
+ type='mmdet.ResNet',
76
+ depth=50,
77
+ num_stages=4,
78
+ out_indices=(0, 1, 2, 3),
79
+ frozen_stages=-1,
80
+ norm_cfg=dict(type='BN', requires_grad=True),
81
+ norm_eval=True,
82
+ style='pytorch',
83
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')
84
+ ),
85
+ neck=dict(
86
+ type='mmdet.FPN',
87
+ in_channels=[256, 512, 1024, 2048],
88
+ out_channels=256,
89
+ num_outs=5),
90
+ rpn_head=dict(
91
+ type='mmdet.RPNHead',
92
+ in_channels=256,
93
+ feat_channels=256,
94
+ anchor_generator=dict(
95
+ type='mmdet.AnchorGenerator',
96
+ scales=[8],
97
+ ratios=[0.5, 1.0, 2.0],
98
+ strides=[4, 8, 16, 32, 64]),
99
+ bbox_coder=dict(
100
+ type='mmdet.DeltaXYWHBBoxCoder',
101
+ target_means=[.0, .0, .0, .0],
102
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
103
+ loss_cls=dict(
104
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
105
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
106
+ roi_head=dict(
107
+ type='mmdet.StandardRoIHead',
108
+ bbox_roi_extractor=dict(
109
+ type='mmdet.SingleRoIExtractor',
110
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
111
+ out_channels=256,
112
+ featmap_strides=[4, 8, 16, 32]),
113
+ bbox_head=dict(
114
+ type='mmdet.Shared2FCBBoxHead',
115
+ in_channels=256,
116
+ fc_out_channels=1024,
117
+ roi_feat_size=7,
118
+ num_classes=num_classes,
119
+ bbox_coder=dict(
120
+ type='mmdet.DeltaXYWHBBoxCoder',
121
+ target_means=[0., 0., 0., 0.],
122
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
123
+ reg_class_agnostic=False,
124
+ loss_cls=dict(
125
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
126
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
127
+ mask_roi_extractor=dict(
128
+ type='mmdet.SingleRoIExtractor',
129
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
130
+ out_channels=256,
131
+ featmap_strides=[4, 8, 16, 32]),
132
+ mask_head=dict(
133
+ type='mmdet.FCNMaskHead',
134
+ num_convs=4,
135
+ in_channels=256,
136
+ conv_out_channels=256,
137
+ num_classes=num_classes,
138
+ loss_mask=dict(
139
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
140
+ # model training and testing settings
141
+ train_cfg=dict(
142
+ rpn=dict(
143
+ assigner=dict(
144
+ type='mmdet.MaxIoUAssigner',
145
+ pos_iou_thr=0.7,
146
+ neg_iou_thr=0.3,
147
+ min_pos_iou=0.3,
148
+ match_low_quality=True,
149
+ ignore_iof_thr=-1),
150
+ sampler=dict(
151
+ type='mmdet.RandomSampler',
152
+ num=256,
153
+ pos_fraction=0.5,
154
+ neg_pos_ub=-1,
155
+ add_gt_as_proposals=False),
156
+ allowed_border=-1,
157
+ pos_weight=-1,
158
+ debug=False),
159
+ rpn_proposal=dict(
160
+ nms_pre=2000,
161
+ max_per_img=1000,
162
+ nms=dict(type='nms', iou_threshold=0.7),
163
+ min_bbox_size=0),
164
+ rcnn=dict(
165
+ assigner=dict(
166
+ type='mmdet.MaxIoUAssigner',
167
+ pos_iou_thr=0.5,
168
+ neg_iou_thr=0.5,
169
+ min_pos_iou=0.5,
170
+ match_low_quality=True,
171
+ ignore_iof_thr=-1),
172
+ sampler=dict(
173
+ type='mmdet.RandomSampler',
174
+ num=512,
175
+ pos_fraction=0.25,
176
+ neg_pos_ub=-1,
177
+ add_gt_as_proposals=True),
178
+ mask_size=28,
179
+ pos_weight=-1,
180
+ debug=False)),
181
+ test_cfg=dict(
182
+ rpn=dict(
183
+ nms_pre=1000,
184
+ max_per_img=1000,
185
+ nms=dict(type='nms', iou_threshold=0.7),
186
+ min_bbox_size=0),
187
+ rcnn=dict(
188
+ score_thr=0.05,
189
+ nms=dict(type='nms', iou_threshold=0.5),
190
+ max_per_img=100,
191
+ mask_thr_binary=0.5)
192
+ )
193
+ )
194
+
195
+
196
+ model_cfg = dict(
197
+ type='MMDetPLer',
198
+ hyperparameters=dict(
199
+ optimizer=optimizer,
200
+ param_scheduler=param_scheduler,
201
+ evaluator=evaluator,
202
+ ),
203
+ whole_model=model,
204
+ )
205
+
206
+ task_name = 'whu_ins'
207
+ exp_name = 'E20230525_0'
208
+ logger = dict(
209
+ type='WandbLogger',
210
+ project=task_name,
211
+ group='maskrcnn',
212
+ name=exp_name
213
+ )
214
+ # logger = None
215
+
216
+
217
+ callbacks = [
218
+ param_scheduler_callback,
219
+ dict(
220
+ type='ModelCheckpoint',
221
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
222
+ save_last=True,
223
+ mode='max',
224
+ monitor='valmap_0',
225
+ save_top_k=2,
226
+ filename='epoch_{epoch}-map_{valmap_0:.4f}'
227
+ ),
228
+ dict(
229
+ type='LearningRateMonitor',
230
+ logging_interval='step'
231
+ )
232
+ ]
233
+
234
+
235
+ trainer_cfg = dict(
236
+ compiled_model=False,
237
+ accelerator="auto",
238
+ strategy="auto",
239
+ # strategy="ddp",
240
+ # strategy='ddp_find_unused_parameters_true',
241
+ # precision='32',
242
+ # precision='16-mixed',
243
+ devices=4,
244
+ default_root_dir=f'results/{task_name}/{exp_name}',
245
+ # default_root_dir='results/tmp',
246
+ max_epochs=max_epochs,
247
+ logger=logger,
248
+ callbacks=callbacks,
249
+ log_every_n_steps=20,
250
+ check_val_every_n_epoch=10,
251
+ benchmark=True,
252
+ # sync_batchnorm=True,
253
+ # fast_dev_run=True,
254
+
255
+ # limit_train_batches=1,
256
+ # limit_val_batches=0,
257
+ # limit_test_batches=None,
258
+ # limit_predict_batches=None,
259
+ # overfit_batches=0.0,
260
+
261
+ # val_check_interval=None,
262
+ # num_sanity_val_steps=1,
263
+ # enable_checkpointing=None,
264
+ # enable_progress_bar=None,
265
+ # enable_model_summary=None,
266
+ # accumulate_grad_batches=32,
267
+ # gradient_clip_val=15,
268
+ # gradient_clip_algorithm='norm',
269
+ # deterministic=None,
270
+ # inference_mode: bool=True,
271
+ use_distributed_sampler=True,
272
+ # profiler="simple",
273
+ # detect_anomaly=False,
274
+ # barebones=False,
275
+ # plugins=None,
276
+ # reload_dataloaders_every_n_epochs=0,
277
+ )
278
+
279
+
280
+ backend_args = None
281
+ train_pipeline = [
282
+ dict(type='mmdet.LoadImageFromFile'),
283
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
284
+ dict(type='mmdet.Resize', scale=image_size),
285
+ dict(type='mmdet.RandomFlip', prob=0.5),
286
+ dict(type='mmdet.PackDetInputs')
287
+ ]
288
+
289
+ test_pipeline = [
290
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
291
+ dict(type='mmdet.Resize', scale=image_size),
292
+ # If you don't have a gt annotation, delete the pipeline
293
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
294
+ dict(
295
+ type='mmdet.PackDetInputs',
296
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
297
+ 'scale_factor'))
298
+ ]
299
+
300
+
301
+ train_batch_size_per_gpu = 8
302
+ train_num_workers = 4
303
+ test_batch_size_per_gpu = 8
304
+ test_num_workers = 4
305
+ persistent_workers = True
306
+
307
+ data_parent = '/Users/kyanchen/datasets/Building/WHU'
308
+ train_data_prefix = 'train/'
309
+ val_data_prefix = 'test/'
310
+
311
+ dataset_type = 'WHUInsSegDataset'
312
+
313
+ val_loader = dict(
314
+ batch_size=test_batch_size_per_gpu,
315
+ num_workers=test_num_workers,
316
+ persistent_workers=persistent_workers,
317
+ pin_memory=True,
318
+ dataset=dict(
319
+ type=dataset_type,
320
+ data_root=data_parent,
321
+ ann_file='annotations/WHU_building_test.json',
322
+ data_prefix=dict(img_path=val_data_prefix+'/image', seg_path=val_data_prefix+'/label'),
323
+ test_mode=True,
324
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
325
+ pipeline=test_pipeline,
326
+ backend_args=backend_args,
327
+ )
328
+ )
329
+
330
+ datamodule_cfg = dict(
331
+ type='PLDataModule',
332
+ train_loader=dict(
333
+ batch_size=train_batch_size_per_gpu,
334
+ num_workers=train_num_workers,
335
+ persistent_workers=persistent_workers,
336
+ pin_memory=True,
337
+ dataset=dict(
338
+ type=dataset_type,
339
+ data_root=data_parent,
340
+ ann_file='annotations/WHU_building_train.json',
341
+ data_prefix=dict(img_path=train_data_prefix+'/image', seg_path=train_data_prefix+'/label'),
342
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
343
+ pipeline=train_pipeline,
344
+ backend_args=backend_args)
345
+ ),
346
+ val_loader=val_loader,
347
+ test_loader=val_loader,
348
+ predict_loader=val_loader
349
+ )
configs/rsprompter/predict_rsprompter_anchor_nwpu.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(
2
+ imports=['mmseg.datasets', 'mmseg.models', 'mmdet.models'],
3
+ allow_failed_imports=False)
4
+
5
+ sub_model_train = [
6
+ 'panoptic_head',
7
+ 'data_preprocessor'
8
+ ]
9
+
10
+ sub_model_optim = {
11
+ 'panoptic_head': {'lr_mult': 1},
12
+ }
13
+
14
+
15
+ max_epochs = 1200
16
+ optimizer = dict(type='AdamW', lr=0.0005, weight_decay=0.0001)
17
+ param_scheduler = [
18
+ dict(
19
+ type='LinearLR',
20
+ start_factor=0.0005,
21
+ by_epoch=True,
22
+ begin=0,
23
+ end=1,
24
+ convert_to_iter_based=True),
25
+ dict(type='CosineAnnealingLR', T_max=120, by_epoch=True, begin=1, end=120)
26
+ ]
27
+
28
+ param_scheduler_callback = dict(type='ParamSchedulerHook')
29
+ evaluator_ = dict(type='MeanAveragePrecision', iou_type='segm')
30
+ evaluator = dict(
31
+ val_evaluator=dict(type='MeanAveragePrecision', iou_type='segm'))
32
+
33
+ image_size = (1024, 1024)
34
+
35
+ data_preprocessor = dict(
36
+ type='mmdet.DetDataPreprocessor',
37
+ mean=[123.675, 116.28, 103.53],
38
+ std=[58.395, 57.12, 57.375],
39
+ bgr_to_rgb=True,
40
+ pad_size_divisor=32,
41
+ pad_mask=True,
42
+ mask_pad_value=0,
43
+ )
44
+
45
+ num_things_classes = 10
46
+ num_stuff_classes = 0
47
+ num_classes = num_things_classes + num_stuff_classes
48
+ prompt_shape = (60, 4)
49
+
50
+
51
+ model_cfg = dict(
52
+ type='SegSAMAnchorPLer',
53
+ hyperparameters=dict(
54
+ optimizer=optimizer,
55
+ param_scheduler=param_scheduler,
56
+ evaluator=evaluator,
57
+ ),
58
+ need_train_names=sub_model_train,
59
+ data_preprocessor=data_preprocessor,
60
+ backbone=dict(
61
+ type='vit_h',
62
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
63
+ # type='vit_b',
64
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
65
+ ),
66
+ panoptic_head=dict(
67
+ type='SAMAnchorInstanceHead',
68
+ neck=dict(
69
+ type='SAMAggregatorNeck',
70
+ in_channels=[1280] * 32,
71
+ # in_channels=[768] * 12,
72
+ inner_channels=32,
73
+ selected_channels=range(4, 32, 2),
74
+ # selected_channels=range(4, 12, 2),
75
+ out_channels=256,
76
+ up_sample_scale=4,
77
+ ),
78
+ rpn_head=dict(
79
+ type='mmdet.RPNHead',
80
+ in_channels=256,
81
+ feat_channels=256,
82
+ anchor_generator=dict(
83
+ type='mmdet.AnchorGenerator',
84
+ scales=[2, 4, 8, 16, 32, 64],
85
+ ratios=[0.5, 1.0, 2.0],
86
+ strides=[8, 16, 32]),
87
+ bbox_coder=dict(
88
+ type='mmdet.DeltaXYWHBBoxCoder',
89
+ target_means=[.0, .0, .0, .0],
90
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
91
+ loss_cls=dict(
92
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
93
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
94
+ roi_head=dict(
95
+ type='SAMAnchorPromptRoIHead',
96
+ bbox_roi_extractor=dict(
97
+ type='mmdet.SingleRoIExtractor',
98
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
99
+ out_channels=256,
100
+ featmap_strides=[8, 16, 32]),
101
+ bbox_head=dict(
102
+ type='mmdet.Shared2FCBBoxHead',
103
+ in_channels=256,
104
+ fc_out_channels=1024,
105
+ roi_feat_size=7,
106
+ num_classes=num_classes,
107
+ bbox_coder=dict(
108
+ type='mmdet.DeltaXYWHBBoxCoder',
109
+ target_means=[0., 0., 0., 0.],
110
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
111
+ reg_class_agnostic=False,
112
+ loss_cls=dict(
113
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
114
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
115
+ mask_roi_extractor=dict(
116
+ type='mmdet.SingleRoIExtractor',
117
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
118
+ out_channels=256,
119
+ featmap_strides=[8, 16, 32]),
120
+ mask_head=dict(
121
+ type='SAMPromptMaskHead',
122
+ per_query_point=prompt_shape[1],
123
+ with_sincos=True,
124
+ class_agnostic=True,
125
+ loss_mask=dict(
126
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
127
+ # model training and testing settings
128
+ train_cfg=dict(
129
+ rpn=dict(
130
+ assigner=dict(
131
+ type='mmdet.MaxIoUAssigner',
132
+ pos_iou_thr=0.7,
133
+ neg_iou_thr=0.3,
134
+ min_pos_iou=0.3,
135
+ match_low_quality=True,
136
+ ignore_iof_thr=-1),
137
+ sampler=dict(
138
+ type='mmdet.RandomSampler',
139
+ num=512,
140
+ pos_fraction=0.5,
141
+ neg_pos_ub=-1,
142
+ add_gt_as_proposals=False),
143
+ allowed_border=-1,
144
+ pos_weight=-1,
145
+ debug=False),
146
+ rpn_proposal=dict(
147
+ nms_pre=2000,
148
+ max_per_img=1000,
149
+ nms=dict(type='nms', iou_threshold=0.7),
150
+ min_bbox_size=0),
151
+ rcnn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.5,
155
+ neg_iou_thr=0.5,
156
+ min_pos_iou=0.5,
157
+ match_low_quality=True,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=256,
162
+ pos_fraction=0.25,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=True),
165
+ mask_size=1024,
166
+ pos_weight=-1,
167
+ debug=False)),
168
+ test_cfg=dict(
169
+ rpn=dict(
170
+ nms_pre=1000,
171
+ max_per_img=1000,
172
+ nms=dict(type='nms', iou_threshold=0.7),
173
+ min_bbox_size=0),
174
+ rcnn=dict(
175
+ score_thr=0.05,
176
+ nms=dict(type='nms', iou_threshold=0.5),
177
+ max_per_img=100,
178
+ mask_thr_binary=0.5)
179
+ )
180
+ )
181
+ )
182
+
183
+
184
+ task_name = 'nwpu_ins'
185
+ exp_name = 'rsprompter_anchor_E20230601_0'
186
+ callbacks = [
187
+ dict(
188
+ type='DetVisualizationHook',
189
+ draw=True,
190
+ interval=1,
191
+ score_thr=0.1,
192
+ show=False,
193
+ wait_time=1.,
194
+ test_out_dir='visualization',
195
+ )
196
+ ]
197
+
198
+
199
+ vis_backends = [dict(type='mmdet.LocalVisBackend')]
200
+ visualizer = dict(
201
+ type='mmdet.DetLocalVisualizer',
202
+ vis_backends=vis_backends,
203
+ name='visualizer',
204
+ fig_save_cfg=dict(
205
+ frameon=False,
206
+ figsize=(40, 20),
207
+ # dpi=300,
208
+ ),
209
+ line_width=2,
210
+ alpha=0.8
211
+ )
212
+
213
+
214
+ trainer_cfg = dict(
215
+ compiled_model=False,
216
+ accelerator='auto',
217
+ strategy='auto',
218
+ devices=[0],
219
+ default_root_dir=f'results/{task_name}/{exp_name}',
220
+ max_epochs=120,
221
+ logger=None,
222
+ callbacks=callbacks,
223
+ log_every_n_steps=20,
224
+ check_val_every_n_epoch=10,
225
+ benchmark=True,
226
+ use_distributed_sampler=True)
227
+
228
+ backend_args = None
229
+ train_pipeline = [
230
+ dict(type='mmdet.LoadImageFromFile'),
231
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
232
+ dict(type='mmdet.Resize', scale=image_size),
233
+ dict(type='mmdet.RandomFlip', prob=0.5),
234
+ dict(type='mmdet.PackDetInputs')
235
+ ]
236
+
237
+ test_pipeline = [
238
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
239
+ dict(type='mmdet.Resize', scale=image_size),
240
+ # If you don't have a gt annotation, delete the pipeline
241
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
242
+ dict(
243
+ type='mmdet.PackDetInputs',
244
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
245
+ 'scale_factor'))
246
+ ]
247
+
248
+ train_batch_size_per_gpu = 8
249
+ train_num_workers = 4
250
+ test_batch_size_per_gpu = 2
251
+ test_num_workers = 0
252
+ persistent_workers = False
253
+
254
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
255
+ train_data_prefix = ''
256
+ val_data_prefix = ''
257
+
258
+ dataset_type = 'NWPUInsSegDataset'
259
+ val_loader = dict(
260
+ batch_size=test_batch_size_per_gpu,
261
+ num_workers=test_num_workers,
262
+ persistent_workers=persistent_workers,
263
+ pin_memory=True,
264
+ dataset=dict(
265
+ type=dataset_type,
266
+ data_root=data_parent,
267
+ ann_file='NWPU_instances_val.json',
268
+ data_prefix=dict(img_path='positive image set'),
269
+ test_mode=True,
270
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
271
+ pipeline=test_pipeline,
272
+ backend_args=backend_args))
273
+
274
+ datamodule_cfg = dict(
275
+ type='PLDataModule',
276
+ predict_loader=val_loader,
277
+ )
configs/rsprompter/rsprompter_anchor_nwpu_config.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 1200
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=1e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 10
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+ prompt_shape = (60, 4)
72
+
73
+ model_cfg = dict(
74
+ type='SegSAMAnchorPLer',
75
+ hyperparameters=dict(
76
+ optimizer=optimizer,
77
+ param_scheduler=param_scheduler,
78
+ evaluator=evaluator,
79
+ ),
80
+ need_train_names=sub_model_train,
81
+ data_preprocessor=data_preprocessor,
82
+ backbone=dict(
83
+ type='vit_h',
84
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
85
+ # type='vit_b',
86
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
87
+ ),
88
+ panoptic_head=dict(
89
+ type='SAMAnchorInstanceHead',
90
+ neck=dict(
91
+ type='SAMAggregatorNeck',
92
+ in_channels=[1280] * 32,
93
+ # in_channels=[768] * 12,
94
+ inner_channels=32,
95
+ selected_channels=range(8, 32, 2),
96
+ # selected_channels=range(4, 12, 2),
97
+ out_channels=256,
98
+ up_sample_scale=4,
99
+ ),
100
+ rpn_head=dict(
101
+ type='mmdet.RPNHead',
102
+ in_channels=256,
103
+ feat_channels=256,
104
+ anchor_generator=dict(
105
+ type='mmdet.AnchorGenerator',
106
+ scales=[2, 4, 8, 16, 32, 64],
107
+ ratios=[0.5, 1.0, 2.0],
108
+ strides=[8, 16, 32]),
109
+ bbox_coder=dict(
110
+ type='mmdet.DeltaXYWHBBoxCoder',
111
+ target_means=[.0, .0, .0, .0],
112
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
113
+ loss_cls=dict(
114
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
115
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
116
+ roi_head=dict(
117
+ type='SAMAnchorPromptRoIHead',
118
+ bbox_roi_extractor=dict(
119
+ type='mmdet.SingleRoIExtractor',
120
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
121
+ out_channels=256,
122
+ featmap_strides=[8, 16, 32]),
123
+ bbox_head=dict(
124
+ type='mmdet.Shared2FCBBoxHead',
125
+ in_channels=256,
126
+ fc_out_channels=1024,
127
+ roi_feat_size=7,
128
+ num_classes=num_classes,
129
+ bbox_coder=dict(
130
+ type='mmdet.DeltaXYWHBBoxCoder',
131
+ target_means=[0., 0., 0., 0.],
132
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
133
+ reg_class_agnostic=False,
134
+ loss_cls=dict(
135
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
136
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
137
+ mask_roi_extractor=dict(
138
+ type='mmdet.SingleRoIExtractor',
139
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
140
+ out_channels=256,
141
+ featmap_strides=[8, 16, 32]),
142
+ mask_head=dict(
143
+ type='SAMPromptMaskHead',
144
+ per_query_point=prompt_shape[1],
145
+ with_sincos=True,
146
+ class_agnostic=True,
147
+ loss_mask=dict(
148
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
149
+ # model training and testing settings
150
+ train_cfg=dict(
151
+ rpn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.7,
155
+ neg_iou_thr=0.3,
156
+ min_pos_iou=0.3,
157
+ match_low_quality=True,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=512,
162
+ pos_fraction=0.5,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=False),
165
+ allowed_border=-1,
166
+ pos_weight=-1,
167
+ debug=False),
168
+ rpn_proposal=dict(
169
+ nms_pre=2000,
170
+ max_per_img=1000,
171
+ nms=dict(type='nms', iou_threshold=0.7),
172
+ min_bbox_size=0),
173
+ rcnn=dict(
174
+ assigner=dict(
175
+ type='mmdet.MaxIoUAssigner',
176
+ pos_iou_thr=0.5,
177
+ neg_iou_thr=0.5,
178
+ min_pos_iou=0.5,
179
+ match_low_quality=True,
180
+ ignore_iof_thr=-1),
181
+ sampler=dict(
182
+ type='mmdet.RandomSampler',
183
+ num=256,
184
+ pos_fraction=0.25,
185
+ neg_pos_ub=-1,
186
+ add_gt_as_proposals=True),
187
+ mask_size=1024,
188
+ pos_weight=-1,
189
+ debug=False)),
190
+ test_cfg=dict(
191
+ rpn=dict(
192
+ nms_pre=1000,
193
+ max_per_img=1000,
194
+ nms=dict(type='nms', iou_threshold=0.7),
195
+ min_bbox_size=0),
196
+ rcnn=dict(
197
+ score_thr=0.05,
198
+ nms=dict(type='nms', iou_threshold=0.5),
199
+ max_per_img=100,
200
+ mask_thr_binary=0.5)
201
+ )
202
+ )
203
+ )
204
+
205
+
206
+ task_name = 'nwpu_ins'
207
+ exp_name = 'E20230629_1'
208
+ logger = dict(
209
+ type='WandbLogger',
210
+ project=task_name,
211
+ group='sam-anchor',
212
+ name=exp_name
213
+ )
214
+
215
+
216
+ callbacks = [
217
+ param_scheduler_callback,
218
+ dict(
219
+ type='ModelCheckpoint',
220
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
221
+ save_last=True,
222
+ mode='max',
223
+ monitor='valsegm_map_0',
224
+ save_top_k=3,
225
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
226
+ ),
227
+ dict(
228
+ type='LearningRateMonitor',
229
+ logging_interval='step'
230
+ )
231
+ ]
232
+
233
+
234
+ trainer_cfg = dict(
235
+ compiled_model=False,
236
+ accelerator="auto",
237
+ strategy="auto",
238
+ # strategy="ddp",
239
+ # strategy='ddp_find_unused_parameters_true',
240
+ # precision='32',
241
+ # precision='16-mixed',
242
+ devices=8,
243
+ default_root_dir=f'results/{task_name}/{exp_name}',
244
+ # default_root_dir='results/tmp',
245
+ max_epochs=max_epochs,
246
+ logger=logger,
247
+ callbacks=callbacks,
248
+ log_every_n_steps=5,
249
+ check_val_every_n_epoch=5,
250
+ benchmark=True,
251
+ # sync_batchnorm=True,
252
+ # fast_dev_run=True,
253
+
254
+ # limit_train_batches=1,
255
+ # limit_val_batches=0,
256
+ # limit_test_batches=None,
257
+ # limit_predict_batches=None,
258
+ # overfit_batches=0.0,
259
+
260
+ # val_check_interval=None,
261
+ # num_sanity_val_steps=0,
262
+ # enable_checkpointing=None,
263
+ # enable_progress_bar=None,
264
+ # enable_model_summary=None,
265
+ # accumulate_grad_batches=32,
266
+ # gradient_clip_val=15,
267
+ # gradient_clip_algorithm='norm',
268
+ # deterministic=None,
269
+ # inference_mode: bool=True,
270
+ use_distributed_sampler=True,
271
+ # profiler="simple",
272
+ # detect_anomaly=False,
273
+ # barebones=False,
274
+ # plugins=None,
275
+ # reload_dataloaders_every_n_epochs=0,
276
+ )
277
+
278
+
279
+ backend_args = None
280
+ train_pipeline = [
281
+ dict(type='mmdet.LoadImageFromFile'),
282
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
283
+ dict(type='mmdet.Resize', scale=image_size),
284
+ dict(type='mmdet.RandomFlip', prob=0.5),
285
+ dict(type='mmdet.PackDetInputs')
286
+ ]
287
+
288
+ test_pipeline = [
289
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
290
+ dict(type='mmdet.Resize', scale=image_size),
291
+ # If you don't have a gt annotation, delete the pipeline
292
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
293
+ dict(
294
+ type='mmdet.PackDetInputs',
295
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
296
+ 'scale_factor'))
297
+ ]
298
+
299
+
300
+ train_batch_size_per_gpu = 2
301
+ train_num_workers = 2
302
+ test_batch_size_per_gpu = 2
303
+ test_num_workers = 2
304
+ persistent_workers = True
305
+
306
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
307
+ train_data_prefix = ''
308
+ val_data_prefix = ''
309
+ dataset_type = 'NWPUInsSegDataset'
310
+
311
+ val_loader = dict(
312
+ batch_size=test_batch_size_per_gpu,
313
+ num_workers=test_num_workers,
314
+ persistent_workers=persistent_workers,
315
+ pin_memory=True,
316
+ dataset=dict(
317
+ type=dataset_type,
318
+ data_root=data_parent,
319
+ ann_file='NWPU_instances_val.json',
320
+ data_prefix=dict(img_path='positive image set'),
321
+ test_mode=True,
322
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
323
+ pipeline=test_pipeline,
324
+ backend_args=backend_args))
325
+
326
+ datamodule_cfg = dict(
327
+ type='PLDataModule',
328
+ train_loader=dict(
329
+ batch_size=train_batch_size_per_gpu,
330
+ num_workers=train_num_workers,
331
+ persistent_workers=persistent_workers,
332
+ pin_memory=True,
333
+ dataset=dict(
334
+ type=dataset_type,
335
+ data_root=data_parent,
336
+ ann_file='NWPU_instances_train.json',
337
+ data_prefix=dict(img_path='positive image set'),
338
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
339
+ pipeline=train_pipeline,
340
+ backend_args=backend_args)
341
+ ),
342
+ val_loader=val_loader,
343
+ # test_loader=val_loader
344
+ predict_loader=val_loader
345
+ )
configs/rsprompter/rsprompter_anchor_ssdd_config.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 1000
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=1e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 1
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+ prompt_shape = (30, 4)
72
+
73
+ model_cfg = dict(
74
+ type='SegSAMAnchorPLer',
75
+ hyperparameters=dict(
76
+ optimizer=optimizer,
77
+ param_scheduler=param_scheduler,
78
+ evaluator=evaluator,
79
+ ),
80
+ need_train_names=sub_model_train,
81
+ data_preprocessor=data_preprocessor,
82
+ backbone=dict(
83
+ type='vit_h',
84
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
85
+ # type='vit_b',
86
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
87
+ ),
88
+ panoptic_head=dict(
89
+ type='SAMAnchorInstanceHead',
90
+ neck=dict(
91
+ type='SAMAggregatorNeck',
92
+ in_channels=[1280] * 32,
93
+ # in_channels=[768] * 12,
94
+ inner_channels=32,
95
+ selected_channels=range(8, 32, 2),
96
+ # selected_channels=range(4, 12, 2),
97
+ out_channels=256,
98
+ up_sample_scale=4,
99
+ ),
100
+ rpn_head=dict(
101
+ type='mmdet.RPNHead',
102
+ in_channels=256,
103
+ feat_channels=256,
104
+ anchor_generator=dict(
105
+ type='mmdet.AnchorGenerator',
106
+ scales=[2, 4, 8, 16, 32, 64],
107
+ ratios=[0.5, 1.0, 2.0],
108
+ strides=[8, 16, 32]),
109
+ bbox_coder=dict(
110
+ type='mmdet.DeltaXYWHBBoxCoder',
111
+ target_means=[.0, .0, .0, .0],
112
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
113
+ loss_cls=dict(
114
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
115
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
116
+ roi_head=dict(
117
+ type='SAMAnchorPromptRoIHead',
118
+ bbox_roi_extractor=dict(
119
+ type='mmdet.SingleRoIExtractor',
120
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
121
+ out_channels=256,
122
+ featmap_strides=[8, 16, 32]),
123
+ bbox_head=dict(
124
+ type='mmdet.Shared2FCBBoxHead',
125
+ in_channels=256,
126
+ fc_out_channels=1024,
127
+ roi_feat_size=7,
128
+ num_classes=num_classes,
129
+ bbox_coder=dict(
130
+ type='mmdet.DeltaXYWHBBoxCoder',
131
+ target_means=[0., 0., 0., 0.],
132
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
133
+ reg_class_agnostic=False,
134
+ loss_cls=dict(
135
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
136
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
137
+ mask_roi_extractor=dict(
138
+ type='mmdet.SingleRoIExtractor',
139
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
140
+ out_channels=256,
141
+ featmap_strides=[8, 16, 32]),
142
+ mask_head=dict(
143
+ type='SAMPromptMaskHead',
144
+ per_query_point=prompt_shape[1],
145
+ with_sincos=True,
146
+ class_agnostic=True,
147
+ loss_mask=dict(
148
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
149
+ # model training and testing settings
150
+ train_cfg=dict(
151
+ rpn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.7,
155
+ neg_iou_thr=0.3,
156
+ min_pos_iou=0.3,
157
+ match_low_quality=True,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=512,
162
+ pos_fraction=0.5,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=False),
165
+ allowed_border=-1,
166
+ pos_weight=-1,
167
+ debug=False),
168
+ rpn_proposal=dict(
169
+ nms_pre=2000,
170
+ max_per_img=1000,
171
+ nms=dict(type='nms', iou_threshold=0.7),
172
+ min_bbox_size=0),
173
+ rcnn=dict(
174
+ assigner=dict(
175
+ type='mmdet.MaxIoUAssigner',
176
+ pos_iou_thr=0.5,
177
+ neg_iou_thr=0.5,
178
+ min_pos_iou=0.5,
179
+ match_low_quality=True,
180
+ ignore_iof_thr=-1),
181
+ sampler=dict(
182
+ type='mmdet.RandomSampler',
183
+ num=256,
184
+ pos_fraction=0.25,
185
+ neg_pos_ub=-1,
186
+ add_gt_as_proposals=True),
187
+ mask_size=1024,
188
+ pos_weight=-1,
189
+ debug=False)),
190
+ test_cfg=dict(
191
+ rpn=dict(
192
+ nms_pre=1000,
193
+ max_per_img=1000,
194
+ nms=dict(type='nms', iou_threshold=0.7),
195
+ min_bbox_size=0),
196
+ rcnn=dict(
197
+ score_thr=0.05,
198
+ nms=dict(type='nms', iou_threshold=0.5),
199
+ max_per_img=100,
200
+ mask_thr_binary=0.5)
201
+ )
202
+ )
203
+ )
204
+
205
+ task_name = 'ssdd_ins'
206
+ exp_name = 'E20230629_2'
207
+ logger = dict(
208
+ type='WandbLogger',
209
+ project=task_name,
210
+ group='sam-anchor',
211
+ name=exp_name
212
+ )
213
+
214
+
215
+ callbacks = [
216
+ param_scheduler_callback,
217
+ dict(
218
+ type='ModelCheckpoint',
219
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
220
+ save_last=True,
221
+ mode='max',
222
+ monitor='valsegm_map_0',
223
+ save_top_k=3,
224
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
225
+ ),
226
+ dict(
227
+ type='LearningRateMonitor',
228
+ logging_interval='step'
229
+ )
230
+ ]
231
+
232
+
233
+ trainer_cfg = dict(
234
+ compiled_model=False,
235
+ accelerator="auto",
236
+ strategy="auto",
237
+ # strategy="ddp",
238
+ # strategy='ddp_find_unused_parameters_true',
239
+ # precision='32',
240
+ # precision='16-mixed',
241
+ devices=8,
242
+ default_root_dir=f'results/{task_name}/{exp_name}',
243
+ # default_root_dir='results/tmp',
244
+ max_epochs=max_epochs,
245
+ logger=logger,
246
+ callbacks=callbacks,
247
+ log_every_n_steps=5,
248
+ check_val_every_n_epoch=5,
249
+ benchmark=True,
250
+ # sync_batchnorm=True,
251
+ # fast_dev_run=True,
252
+
253
+ # limit_train_batches=1,
254
+ # limit_val_batches=0,
255
+ # limit_test_batches=None,
256
+ # limit_predict_batches=None,
257
+ # overfit_batches=0.0,
258
+
259
+ # val_check_interval=None,
260
+ # num_sanity_val_steps=0,
261
+ # enable_checkpointing=None,
262
+ # enable_progress_bar=None,
263
+ # enable_model_summary=None,
264
+ # accumulate_grad_batches=32,
265
+ # gradient_clip_val=15,
266
+ # gradient_clip_algorithm='norm',
267
+ # deterministic=None,
268
+ # inference_mode: bool=True,
269
+ use_distributed_sampler=True,
270
+ # profiler="simple",
271
+ # detect_anomaly=False,
272
+ # barebones=False,
273
+ # plugins=None,
274
+ # reload_dataloaders_every_n_epochs=0,
275
+ )
276
+
277
+
278
+ backend_args = None
279
+ train_pipeline = [
280
+ dict(type='mmdet.LoadImageFromFile'),
281
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
282
+ dict(type='mmdet.Resize', scale=image_size),
283
+ dict(type='mmdet.RandomFlip', prob=0.5),
284
+ dict(type='mmdet.PackDetInputs')
285
+ ]
286
+
287
+ test_pipeline = [
288
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
289
+ dict(type='mmdet.Resize', scale=image_size),
290
+ # If you don't have a gt annotation, delete the pipeline
291
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
292
+ dict(
293
+ type='mmdet.PackDetInputs',
294
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
295
+ 'scale_factor'))
296
+ ]
297
+
298
+
299
+ train_batch_size_per_gpu = 2
300
+ train_num_workers = 2
301
+ test_batch_size_per_gpu = 2
302
+ test_num_workers = 2
303
+ persistent_workers = True
304
+
305
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
306
+ dataset_type = 'SSDDInsSegDataset'
307
+
308
+
309
+ val_loader = dict(
310
+ batch_size=test_batch_size_per_gpu,
311
+ num_workers=test_num_workers,
312
+ persistent_workers=persistent_workers,
313
+ pin_memory=True,
314
+ dataset=dict(
315
+ type=dataset_type,
316
+ data_root=data_parent,
317
+ # ann_file='NWPU_instances_val.json',
318
+ # data_prefix=dict(img_path='positive image set'),
319
+ ann_file='annotations/SSDD_instances_val.json',
320
+ data_prefix=dict(img_path='imgs'),
321
+ test_mode=True,
322
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
323
+ pipeline=test_pipeline,
324
+ backend_args=backend_args))
325
+
326
+ datamodule_cfg = dict(
327
+ type='PLDataModule',
328
+ train_loader=dict(
329
+ batch_size=train_batch_size_per_gpu,
330
+ num_workers=train_num_workers,
331
+ persistent_workers=persistent_workers,
332
+ pin_memory=True,
333
+ dataset=dict(
334
+ type=dataset_type,
335
+ data_root=data_parent,
336
+ # ann_file='NWPU_instances_train.json',
337
+ # data_prefix=dict(img_path='positive image set'),
338
+ ann_file='annotations/SSDD_instances_train.json',
339
+ data_prefix=dict(img_path='imgs'),
340
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
341
+ pipeline=train_pipeline,
342
+ backend_args=backend_args)
343
+ ),
344
+ val_loader=val_loader,
345
+ # test_loader=val_loader
346
+ predict_loader=val_loader
347
+ )
configs/rsprompter/rsprompter_anchor_whu_config.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 2000
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=1e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 1
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+ prompt_shape = (90, 4)
72
+
73
+
74
+ model_cfg = dict(
75
+ type='SegSAMAnchorPLer',
76
+ hyperparameters=dict(
77
+ optimizer=optimizer,
78
+ param_scheduler=param_scheduler,
79
+ evaluator=evaluator,
80
+ ),
81
+ need_train_names=sub_model_train,
82
+ data_preprocessor=data_preprocessor,
83
+ backbone=dict(
84
+ type='vit_h',
85
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
86
+ # type='vit_b',
87
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
88
+ ),
89
+ panoptic_head=dict(
90
+ type='SAMAnchorInstanceHead',
91
+ neck=dict(
92
+ type='SAMAggregatorNeck',
93
+ in_channels=[1280] * 32,
94
+ # in_channels=[768] * 12,
95
+ inner_channels=32,
96
+ selected_channels=range(4, 32, 2),
97
+ # selected_channels=range(4, 12, 2),
98
+ out_channels=256,
99
+ up_sample_scale=4,
100
+ ),
101
+ rpn_head=dict(
102
+ type='mmdet.RPNHead',
103
+ in_channels=256,
104
+ feat_channels=256,
105
+ anchor_generator=dict(
106
+ type='mmdet.AnchorGenerator',
107
+ scales=[2, 4, 8, 16, 32, 64],
108
+ ratios=[0.5, 1.0, 2.0],
109
+ strides=[8, 16, 32]),
110
+ bbox_coder=dict(
111
+ type='mmdet.DeltaXYWHBBoxCoder',
112
+ target_means=[.0, .0, .0, .0],
113
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
114
+ loss_cls=dict(
115
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
116
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
117
+ roi_head=dict(
118
+ type='SAMAnchorPromptRoIHead',
119
+ bbox_roi_extractor=dict(
120
+ type='mmdet.SingleRoIExtractor',
121
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
122
+ out_channels=256,
123
+ featmap_strides=[8, 16, 32]),
124
+ bbox_head=dict(
125
+ type='mmdet.Shared2FCBBoxHead',
126
+ in_channels=256,
127
+ fc_out_channels=1024,
128
+ roi_feat_size=7,
129
+ num_classes=num_classes,
130
+ bbox_coder=dict(
131
+ type='mmdet.DeltaXYWHBBoxCoder',
132
+ target_means=[0., 0., 0., 0.],
133
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
134
+ reg_class_agnostic=False,
135
+ loss_cls=dict(
136
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
137
+ loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)),
138
+ mask_roi_extractor=dict(
139
+ type='mmdet.SingleRoIExtractor',
140
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
141
+ out_channels=256,
142
+ featmap_strides=[8, 16, 32]),
143
+ mask_head=dict(
144
+ type='SAMPromptMaskHead',
145
+ per_query_point=prompt_shape[1],
146
+ with_sincos=True,
147
+ class_agnostic=True,
148
+ loss_mask=dict(
149
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
150
+ # model training and testing settings
151
+ train_cfg=dict(
152
+ rpn=dict(
153
+ assigner=dict(
154
+ type='mmdet.MaxIoUAssigner',
155
+ pos_iou_thr=0.7,
156
+ neg_iou_thr=0.3,
157
+ min_pos_iou=0.3,
158
+ match_low_quality=True,
159
+ ignore_iof_thr=-1),
160
+ sampler=dict(
161
+ type='mmdet.RandomSampler',
162
+ num=512,
163
+ pos_fraction=0.5,
164
+ neg_pos_ub=-1,
165
+ add_gt_as_proposals=False),
166
+ allowed_border=-1,
167
+ pos_weight=-1,
168
+ debug=False),
169
+ rpn_proposal=dict(
170
+ nms_pre=2000,
171
+ max_per_img=1000,
172
+ nms=dict(type='nms', iou_threshold=0.7),
173
+ min_bbox_size=0),
174
+ rcnn=dict(
175
+ assigner=dict(
176
+ type='mmdet.MaxIoUAssigner',
177
+ pos_iou_thr=0.5,
178
+ neg_iou_thr=0.5,
179
+ min_pos_iou=0.5,
180
+ match_low_quality=True,
181
+ ignore_iof_thr=-1),
182
+ sampler=dict(
183
+ type='mmdet.RandomSampler',
184
+ num=256,
185
+ pos_fraction=0.25,
186
+ neg_pos_ub=-1,
187
+ add_gt_as_proposals=True),
188
+ mask_size=1024,
189
+ pos_weight=-1,
190
+ debug=False)),
191
+ test_cfg=dict(
192
+ rpn=dict(
193
+ nms_pre=1000,
194
+ max_per_img=1000,
195
+ nms=dict(type='nms', iou_threshold=0.7),
196
+ min_bbox_size=0),
197
+ rcnn=dict(
198
+ score_thr=0.05,
199
+ nms=dict(type='nms', iou_threshold=0.5),
200
+ max_per_img=100,
201
+ mask_thr_binary=0.5)
202
+ )
203
+ )
204
+ )
205
+
206
+ task_name = 'whu_ins'
207
+ exp_name = 'E20230629_0'
208
+ logger = dict(
209
+ type='WandbLogger',
210
+ project=task_name,
211
+ group='sam-anchor',
212
+ name=exp_name
213
+ )
214
+
215
+
216
+ callbacks = [
217
+ param_scheduler_callback,
218
+ dict(
219
+ type='ModelCheckpoint',
220
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
221
+ save_last=True,
222
+ mode='max',
223
+ monitor='valsegm_map_0',
224
+ save_top_k=3,
225
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
226
+ ),
227
+ dict(
228
+ type='LearningRateMonitor',
229
+ logging_interval='step'
230
+ )
231
+ ]
232
+
233
+
234
+ trainer_cfg = dict(
235
+ compiled_model=False,
236
+ accelerator="auto",
237
+ strategy="auto",
238
+ # strategy="ddp",
239
+ # strategy='ddp_find_unused_parameters_true',
240
+ # precision='32',
241
+ # precision='16-mixed',
242
+ devices=8,
243
+ default_root_dir=f'results/{task_name}/{exp_name}',
244
+ # default_root_dir='results/tmp',
245
+ max_epochs=max_epochs,
246
+ logger=logger,
247
+ callbacks=callbacks,
248
+ log_every_n_steps=10,
249
+ check_val_every_n_epoch=5,
250
+ benchmark=True,
251
+ # sync_batchnorm=True,
252
+ # fast_dev_run=True,
253
+
254
+ # limit_train_batches=1,
255
+ # limit_val_batches=0,
256
+ # limit_test_batches=None,
257
+ # limit_predict_batches=None,
258
+ # overfit_batches=0.0,
259
+
260
+ # val_check_interval=None,
261
+ # num_sanity_val_steps=0,
262
+ # enable_checkpointing=None,
263
+ # enable_progress_bar=None,
264
+ # enable_model_summary=None,
265
+ # accumulate_grad_batches=32,
266
+ # gradient_clip_val=15,
267
+ # gradient_clip_algorithm='norm',
268
+ # deterministic=None,
269
+ # inference_mode: bool=True,
270
+ use_distributed_sampler=True,
271
+ # profiler="simple",
272
+ # detect_anomaly=False,
273
+ # barebones=False,
274
+ # plugins=None,
275
+ # reload_dataloaders_every_n_epochs=0,
276
+ )
277
+
278
+
279
+ backend_args = None
280
+ train_pipeline = [
281
+ dict(type='mmdet.LoadImageFromFile'),
282
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
283
+ dict(type='mmdet.Resize', scale=image_size),
284
+ dict(type='mmdet.RandomFlip', prob=0.5),
285
+ dict(type='mmdet.PackDetInputs')
286
+ ]
287
+
288
+ test_pipeline = [
289
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
290
+ dict(type='mmdet.Resize', scale=image_size),
291
+ # If you don't have a gt annotation, delete the pipeline
292
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
293
+ dict(
294
+ type='mmdet.PackDetInputs',
295
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
296
+ 'scale_factor'))
297
+ ]
298
+
299
+
300
+ train_batch_size_per_gpu = 2
301
+ train_num_workers = 2
302
+ test_batch_size_per_gpu = 2
303
+ test_num_workers = 2
304
+ persistent_workers = True
305
+
306
+
307
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
308
+ train_data_prefix = 'train/'
309
+ val_data_prefix = 'test/'
310
+ dataset_type = 'WHUInsSegDataset'
311
+
312
+
313
+ val_loader = dict(
314
+ batch_size=test_batch_size_per_gpu,
315
+ num_workers=test_num_workers,
316
+ persistent_workers=persistent_workers,
317
+ pin_memory=True,
318
+ dataset=dict(
319
+ type=dataset_type,
320
+ data_root=data_parent,
321
+ # ann_file='NWPU_instances_val.json',
322
+ # data_prefix=dict(img_path='positive image set'),
323
+ # ann_file='annotations/SSDD_instances_val.json',
324
+ # data_prefix=dict(img_path='imgs'),
325
+ ann_file='annotations/WHU_building_test.json',
326
+ data_prefix=dict(img_path=val_data_prefix + '/image'),
327
+ test_mode=True,
328
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
329
+ pipeline=test_pipeline,
330
+ backend_args=backend_args))
331
+
332
+ datamodule_cfg = dict(
333
+ type='PLDataModule',
334
+ train_loader=dict(
335
+ batch_size=train_batch_size_per_gpu,
336
+ num_workers=train_num_workers,
337
+ persistent_workers=persistent_workers,
338
+ pin_memory=True,
339
+ dataset=dict(
340
+ type=dataset_type,
341
+ data_root=data_parent,
342
+ # ann_file='NWPU_instances_train.json',
343
+ # data_prefix=dict(img_path='positive image set'),
344
+ # ann_file='annotations/SSDD_instances_train.json',
345
+ # data_prefix=dict(img_path='imgs'),
346
+ ann_file='annotations/WHU_building_train.json',
347
+ data_prefix=dict(img_path=train_data_prefix + '/image'),
348
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
349
+ pipeline=train_pipeline,
350
+ backend_args=backend_args)
351
+ ),
352
+ val_loader=val_loader,
353
+ # test_loader=val_loader
354
+ predict_loader=val_loader
355
+ )
configs/rsprompter/rsprompter_query_nwpu_config.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'panoptic_fusion_head',
6
+ 'data_preprocessor'
7
+ ]
8
+
9
+ sub_model_optim = {
10
+ 'panoptic_head': {'lr_mult': 1},
11
+ 'panoptic_fusion_head': {'lr_mult': 1},
12
+ }
13
+
14
+ max_epochs = 5000
15
+
16
+ optimizer = dict(
17
+ type='AdamW',
18
+ sub_model=sub_model_optim,
19
+ lr=0.0005,
20
+ weight_decay=1e-3
21
+ )
22
+
23
+ param_scheduler = [
24
+ # warm up learning rate scheduler
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=1e-4,
28
+ by_epoch=True,
29
+ begin=0,
30
+ end=1,
31
+ # update by iter
32
+ convert_to_iter_based=True),
33
+ # main learning rate scheduler
34
+ dict(
35
+ type='CosineAnnealingLR',
36
+ T_max=max_epochs,
37
+ by_epoch=True,
38
+ begin=1,
39
+ end=max_epochs,
40
+ ),
41
+ ]
42
+
43
+ param_scheduler_callback = dict(
44
+ type='ParamSchedulerHook'
45
+ )
46
+
47
+ evaluator_ = dict(
48
+ type='CocoPLMetric',
49
+ metric=['bbox', 'segm'],
50
+ proposal_nums=[1, 10, 100]
51
+ )
52
+
53
+ evaluator = dict(
54
+ val_evaluator=evaluator_,
55
+ )
56
+
57
+
58
+ image_size = (1024, 1024)
59
+
60
+ data_preprocessor = dict(
61
+ type='mmdet.DetDataPreprocessor',
62
+ mean=[123.675, 116.28, 103.53],
63
+ std=[58.395, 57.12, 57.375],
64
+ bgr_to_rgb=True,
65
+ pad_size_divisor=32,
66
+ pad_mask=True,
67
+ mask_pad_value=0,
68
+ )
69
+
70
+ num_things_classes = 10
71
+ num_stuff_classes = 0
72
+ num_classes = num_things_classes + num_stuff_classes
73
+ prompt_shape = (60, 4)
74
+
75
+
76
+ model_cfg = dict(
77
+ type='SegSAMPLer',
78
+ hyperparameters=dict(
79
+ optimizer=optimizer,
80
+ param_scheduler=param_scheduler,
81
+ evaluator=evaluator,
82
+ ),
83
+ need_train_names=sub_model_train,
84
+ data_preprocessor=data_preprocessor,
85
+ backbone=dict(
86
+ type='vit_h',
87
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
88
+ # type='vit_b',
89
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
90
+ ),
91
+ panoptic_head=dict(
92
+ type='SAMInstanceHead',
93
+ num_things_classes=num_things_classes,
94
+ num_stuff_classes=num_stuff_classes,
95
+ with_multiscale=True,
96
+ with_sincos=True,
97
+ prompt_neck=dict(
98
+ type='SAMTransformerEDPromptGenNeck',
99
+ prompt_shape=prompt_shape,
100
+ in_channels=[1280] * 32,
101
+ inner_channels=32,
102
+ selected_channels=range(4, 32, 2),
103
+ # in_channels=[768] * 8,
104
+ num_encoders=1,
105
+ num_decoders=4,
106
+ out_channels=256
107
+ ),
108
+ loss_cls=dict(
109
+ type='mmdet.CrossEntropyLoss',
110
+ use_sigmoid=False,
111
+ loss_weight=2.0,
112
+ reduction='mean',
113
+ class_weight=[1.0] * num_classes + [0.1]),
114
+ loss_mask=dict(
115
+ type='mmdet.CrossEntropyLoss',
116
+ use_sigmoid=True,
117
+ reduction='mean',
118
+ loss_weight=5.0),
119
+ loss_dice=dict(
120
+ type='mmdet.DiceLoss',
121
+ use_sigmoid=True,
122
+ activate=True,
123
+ reduction='mean',
124
+ naive_dice=True,
125
+ eps=1.0,
126
+ loss_weight=5.0)),
127
+ panoptic_fusion_head=dict(
128
+ type='mmdet.MaskFormerFusionHead',
129
+ num_things_classes=num_things_classes,
130
+ num_stuff_classes=num_stuff_classes,
131
+ loss_panoptic=None,
132
+ init_cfg=None),
133
+ train_cfg=dict(
134
+ num_points=12544,
135
+ oversample_ratio=3.0,
136
+ importance_sample_ratio=0.75,
137
+ assigner=dict(
138
+ type='mmdet.HungarianAssigner',
139
+ match_costs=[
140
+ dict(type='mmdet.ClassificationCost', weight=2.0),
141
+ dict(
142
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
143
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
144
+ ]),
145
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
146
+ test_cfg=dict(
147
+ panoptic_on=False,
148
+ # For now, the dataset does not support
149
+ # evaluating semantic segmentation metric.
150
+ semantic_on=False,
151
+ instance_on=True,
152
+ # max_per_image is for instance segmentation.
153
+ max_per_image=prompt_shape[0],
154
+ iou_thr=0.8,
155
+ # In Mask2Former's panoptic postprocessing,
156
+ # it will filter mask area where score is less than 0.5 .
157
+ filter_low_score=True),
158
+ )
159
+
160
+ task_name = 'nwpu_ins'
161
+ exp_name = 'E20230623_1'
162
+ logger = dict(
163
+ type='WandbLogger',
164
+ project=task_name,
165
+ group='sam-query',
166
+ name=exp_name
167
+ )
168
+
169
+
170
+ callbacks = [
171
+ param_scheduler_callback,
172
+ dict(
173
+ type='ModelCheckpoint',
174
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
175
+ save_last=True,
176
+ mode='max',
177
+ monitor='valsegm_map_0',
178
+ save_top_k=3,
179
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
180
+ ),
181
+ dict(
182
+ type='LearningRateMonitor',
183
+ logging_interval='step'
184
+ )
185
+ ]
186
+
187
+
188
+ trainer_cfg = dict(
189
+ compiled_model=False,
190
+ accelerator="auto",
191
+ strategy="auto",
192
+ # strategy="ddp",
193
+ # strategy='ddp_find_unused_parameters_true',
194
+ # precision='32',
195
+ # precision='16-mixed',
196
+ devices=8,
197
+ default_root_dir=f'results/{task_name}/{exp_name}',
198
+ # default_root_dir='results/tmp',
199
+ max_epochs=max_epochs,
200
+ logger=logger,
201
+ callbacks=callbacks,
202
+ log_every_n_steps=5,
203
+ check_val_every_n_epoch=5,
204
+ benchmark=True,
205
+ # sync_batchnorm=True,
206
+ # fast_dev_run=True,
207
+
208
+ # limit_train_batches=1,
209
+ # limit_val_batches=0,
210
+ # limit_test_batches=None,
211
+ # limit_predict_batches=None,
212
+ # overfit_batches=0.0,
213
+
214
+ # val_check_interval=None,
215
+ # num_sanity_val_steps=0,
216
+ # enable_checkpointing=None,
217
+ # enable_progress_bar=None,
218
+ # enable_model_summary=None,
219
+ # accumulate_grad_batches=32,
220
+ # gradient_clip_val=15,
221
+ # gradient_clip_algorithm='norm',
222
+ # deterministic=None,
223
+ # inference_mode: bool=True,
224
+ use_distributed_sampler=True,
225
+ # profiler="simple",
226
+ # detect_anomaly=False,
227
+ # barebones=False,
228
+ # plugins=None,
229
+ # reload_dataloaders_every_n_epochs=0,
230
+ )
231
+
232
+
233
+ backend_args = None
234
+ train_pipeline = [
235
+ dict(type='mmdet.LoadImageFromFile'),
236
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
237
+ dict(type='mmdet.Resize', scale=image_size),
238
+ dict(type='mmdet.RandomFlip', prob=0.5),
239
+ dict(type='mmdet.PackDetInputs')
240
+ ]
241
+
242
+ test_pipeline = [
243
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
244
+ dict(type='mmdet.Resize', scale=image_size),
245
+ # If you don't have a gt annotation, delete the pipeline
246
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
247
+ dict(
248
+ type='mmdet.PackDetInputs',
249
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
250
+ 'scale_factor'))
251
+ ]
252
+
253
+
254
+ train_batch_size_per_gpu = 3
255
+ train_num_workers = 2
256
+ test_batch_size_per_gpu = 3
257
+ test_num_workers = 2
258
+ persistent_workers = True
259
+
260
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
261
+ train_data_prefix = ''
262
+ val_data_prefix = ''
263
+
264
+ dataset_type = 'NWPUInsSegDataset'
265
+
266
+ val_loader = dict(
267
+ batch_size=test_batch_size_per_gpu,
268
+ num_workers=test_num_workers,
269
+ persistent_workers=persistent_workers,
270
+ pin_memory=True,
271
+ dataset=dict(
272
+ type=dataset_type,
273
+ data_root=data_parent,
274
+ ann_file='NWPU_instances_val.json',
275
+ data_prefix=dict(img_path='positive image set'),
276
+ test_mode=True,
277
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
278
+ pipeline=test_pipeline,
279
+ backend_args=backend_args))
280
+
281
+ datamodule_cfg = dict(
282
+ type='PLDataModule',
283
+ train_loader=dict(
284
+ batch_size=train_batch_size_per_gpu,
285
+ num_workers=train_num_workers,
286
+ persistent_workers=persistent_workers,
287
+ pin_memory=True,
288
+ dataset=dict(
289
+ type=dataset_type,
290
+ data_root=data_parent,
291
+ ann_file='NWPU_instances_train.json',
292
+ data_prefix=dict(img_path='positive image set'),
293
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
294
+ pipeline=train_pipeline,
295
+ backend_args=backend_args)
296
+ ),
297
+ val_loader=val_loader,
298
+ # test_loader=val_loader
299
+ predict_loader=val_loader
300
+ )
configs/rsprompter/rsprompter_query_ssdd_config.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'panoptic_fusion_head',
6
+ 'data_preprocessor'
7
+ ]
8
+
9
+ sub_model_optim = {
10
+ 'panoptic_head': {'lr_mult': 1},
11
+ 'panoptic_fusion_head': {'lr_mult': 1},
12
+ }
13
+
14
+ max_epochs = 5000
15
+
16
+ optimizer = dict(
17
+ type='AdamW',
18
+ sub_model=sub_model_optim,
19
+ lr=0.0005,
20
+ weight_decay=1e-3
21
+ )
22
+
23
+ param_scheduler = [
24
+ # warm up learning rate scheduler
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=1e-4,
28
+ by_epoch=True,
29
+ begin=0,
30
+ end=1,
31
+ # update by iter
32
+ convert_to_iter_based=True),
33
+ # main learning rate scheduler
34
+ dict(
35
+ type='CosineAnnealingLR',
36
+ T_max=max_epochs,
37
+ by_epoch=True,
38
+ begin=1,
39
+ end=max_epochs,
40
+ ),
41
+ ]
42
+
43
+ param_scheduler_callback = dict(
44
+ type='ParamSchedulerHook'
45
+ )
46
+
47
+ evaluator_ = dict(
48
+ type='CocoPLMetric',
49
+ metric=['bbox', 'segm'],
50
+ proposal_nums=[1, 10, 100]
51
+ )
52
+
53
+ evaluator = dict(
54
+ val_evaluator=evaluator_,
55
+ )
56
+
57
+
58
+ image_size = (1024, 1024)
59
+
60
+ data_preprocessor = dict(
61
+ type='mmdet.DetDataPreprocessor',
62
+ mean=[123.675, 116.28, 103.53],
63
+ std=[58.395, 57.12, 57.375],
64
+ bgr_to_rgb=True,
65
+ pad_size_divisor=32,
66
+ pad_mask=True,
67
+ mask_pad_value=0,
68
+ )
69
+
70
+ num_things_classes = 1
71
+ num_stuff_classes = 0
72
+ num_classes = num_things_classes + num_stuff_classes
73
+ prompt_shape = (30, 4)
74
+
75
+
76
+ model_cfg = dict(
77
+ type='SegSAMPLer',
78
+ hyperparameters=dict(
79
+ optimizer=optimizer,
80
+ param_scheduler=param_scheduler,
81
+ evaluator=evaluator,
82
+ ),
83
+ need_train_names=sub_model_train,
84
+ data_preprocessor=data_preprocessor,
85
+ backbone=dict(
86
+ type='vit_h',
87
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
88
+ # type='vit_b',
89
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
90
+ ),
91
+ panoptic_head=dict(
92
+ type='SAMInstanceHead',
93
+ num_things_classes=num_things_classes,
94
+ num_stuff_classes=num_stuff_classes,
95
+ with_multiscale=True,
96
+ with_sincos=True,
97
+ prompt_neck=dict(
98
+ type='SAMTransformerEDPromptGenNeck',
99
+ prompt_shape=prompt_shape,
100
+ in_channels=[1280] * 32,
101
+ inner_channels=32,
102
+ selected_channels=range(4, 32, 2),
103
+ # in_channels=[768] * 8,
104
+ num_encoders=1,
105
+ num_decoders=4,
106
+ out_channels=256
107
+ ),
108
+ loss_cls=dict(
109
+ type='mmdet.CrossEntropyLoss',
110
+ use_sigmoid=False,
111
+ loss_weight=2.0,
112
+ reduction='mean',
113
+ class_weight=[1.0] * num_classes + [0.1]),
114
+ loss_mask=dict(
115
+ type='mmdet.CrossEntropyLoss',
116
+ use_sigmoid=True,
117
+ reduction='mean',
118
+ loss_weight=5.0),
119
+ loss_dice=dict(
120
+ type='mmdet.DiceLoss',
121
+ use_sigmoid=True,
122
+ activate=True,
123
+ reduction='mean',
124
+ naive_dice=True,
125
+ eps=1.0,
126
+ loss_weight=5.0)),
127
+ panoptic_fusion_head=dict(
128
+ type='mmdet.MaskFormerFusionHead',
129
+ num_things_classes=num_things_classes,
130
+ num_stuff_classes=num_stuff_classes,
131
+ loss_panoptic=None,
132
+ init_cfg=None),
133
+ train_cfg=dict(
134
+ num_points=12544,
135
+ oversample_ratio=3.0,
136
+ importance_sample_ratio=0.75,
137
+ assigner=dict(
138
+ type='mmdet.HungarianAssigner',
139
+ match_costs=[
140
+ dict(type='mmdet.ClassificationCost', weight=2.0),
141
+ dict(
142
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
143
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
144
+ ]),
145
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
146
+ test_cfg=dict(
147
+ panoptic_on=False,
148
+ # For now, the dataset does not support
149
+ # evaluating semantic segmentation metric.
150
+ semantic_on=False,
151
+ instance_on=True,
152
+ # max_per_image is for instance segmentation.
153
+ max_per_image=prompt_shape[0],
154
+ iou_thr=0.8,
155
+ # In Mask2Former's panoptic postprocessing,
156
+ # it will filter mask area where score is less than 0.5 .
157
+ filter_low_score=True),
158
+ )
159
+
160
+ task_name = 'ssdd_ins'
161
+ exp_name = 'E20230527_1'
162
+ logger = dict(
163
+ type='WandbLogger',
164
+ project=task_name,
165
+ group='sam',
166
+ name=exp_name
167
+ )
168
+ # logger = None
169
+
170
+
171
+ callbacks = [
172
+ param_scheduler_callback,
173
+ dict(
174
+ type='ModelCheckpoint',
175
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
176
+ save_last=True,
177
+ mode='max',
178
+ monitor='valsegm_map_0',
179
+ save_top_k=2,
180
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
181
+ ),
182
+ dict(
183
+ type='LearningRateMonitor',
184
+ logging_interval='step'
185
+ )
186
+ ]
187
+
188
+
189
+ trainer_cfg = dict(
190
+ compiled_model=False,
191
+ accelerator="auto",
192
+ strategy="auto",
193
+ # strategy="ddp",
194
+ # strategy='ddp_find_unused_parameters_true',
195
+ # precision='32',
196
+ # precision='16-mixed',
197
+ devices=8,
198
+ default_root_dir=f'results/{task_name}/{exp_name}',
199
+ # default_root_dir='results/tmp',
200
+ max_epochs=max_epochs,
201
+ logger=logger,
202
+ callbacks=callbacks,
203
+ log_every_n_steps=10,
204
+ check_val_every_n_epoch=5,
205
+ benchmark=True,
206
+ # sync_batchnorm=True,
207
+ # fast_dev_run=True,
208
+
209
+ # limit_train_batches=1,
210
+ # limit_val_batches=0,
211
+ # limit_test_batches=None,
212
+ # limit_predict_batches=None,
213
+ # overfit_batches=0.0,
214
+
215
+ # val_check_interval=None,
216
+ # num_sanity_val_steps=0,
217
+ # enable_checkpointing=None,
218
+ # enable_progress_bar=None,
219
+ # enable_model_summary=None,
220
+ # accumulate_grad_batches=32,
221
+ # gradient_clip_val=15,
222
+ # gradient_clip_algorithm='norm',
223
+ # deterministic=None,
224
+ # inference_mode: bool=True,
225
+ use_distributed_sampler=True,
226
+ # profiler="simple",
227
+ # detect_anomaly=False,
228
+ # barebones=False,
229
+ # plugins=None,
230
+ # reload_dataloaders_every_n_epochs=0,
231
+ )
232
+
233
+
234
+ backend_args = None
235
+ train_pipeline = [
236
+ dict(type='mmdet.LoadImageFromFile'),
237
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
238
+ dict(type='mmdet.Resize', scale=image_size),
239
+ dict(type='mmdet.RandomFlip', prob=0.5),
240
+ dict(type='mmdet.PackDetInputs')
241
+ ]
242
+
243
+ test_pipeline = [
244
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
245
+ dict(type='mmdet.Resize', scale=image_size),
246
+ # If you don't have a gt annotation, delete the pipeline
247
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
248
+ dict(
249
+ type='mmdet.PackDetInputs',
250
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
251
+ 'scale_factor'))
252
+ ]
253
+
254
+
255
+ train_batch_size_per_gpu = 4
256
+ train_num_workers = 2
257
+ test_batch_size_per_gpu = 4
258
+ test_num_workers = 2
259
+ persistent_workers = True
260
+
261
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
262
+ dataset_type = 'SSDDInsSegDataset'
263
+
264
+ val_loader = dict(
265
+ batch_size=test_batch_size_per_gpu,
266
+ num_workers=test_num_workers,
267
+ persistent_workers=persistent_workers,
268
+ pin_memory=True,
269
+ dataset=dict(
270
+ type=dataset_type,
271
+ data_root=data_parent,
272
+ ann_file='annotations/SSDD_instances_val.json',
273
+ data_prefix=dict(img_path='imgs'),
274
+ test_mode=True,
275
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
276
+ pipeline=test_pipeline,
277
+ backend_args=backend_args))
278
+
279
+ datamodule_cfg = dict(
280
+ type='PLDataModule',
281
+ train_loader=dict(
282
+ batch_size=train_batch_size_per_gpu,
283
+ num_workers=train_num_workers,
284
+ persistent_workers=persistent_workers,
285
+ pin_memory=True,
286
+ dataset=dict(
287
+ type=dataset_type,
288
+ data_root=data_parent,
289
+ ann_file='annotations/SSDD_instances_train.json',
290
+ data_prefix=dict(img_path='imgs'),
291
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
292
+ pipeline=train_pipeline,
293
+ backend_args=backend_args)
294
+ ),
295
+ val_loader=val_loader,
296
+ # test_loader=val_loader
297
+ predict_loader=val_loader
298
+ )
configs/rsprompter/rsprompter_query_whu_config.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'panoptic_fusion_head',
6
+ 'data_preprocessor'
7
+ ]
8
+
9
+ sub_model_optim = {
10
+ 'panoptic_head': {'lr_mult': 1},
11
+ 'panoptic_fusion_head': {'lr_mult': 1},
12
+ }
13
+
14
+ max_epochs = 5000
15
+
16
+ optimizer = dict(
17
+ type='AdamW',
18
+ sub_model=sub_model_optim,
19
+ lr=0.0005,
20
+ weight_decay=1e-3
21
+ )
22
+
23
+ param_scheduler = [
24
+ # warm up learning rate scheduler
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=1e-4,
28
+ by_epoch=True,
29
+ begin=0,
30
+ end=1,
31
+ # update by iter
32
+ convert_to_iter_based=True),
33
+ # main learning rate scheduler
34
+ dict(
35
+ type='CosineAnnealingLR',
36
+ T_max=max_epochs,
37
+ by_epoch=True,
38
+ begin=1,
39
+ end=max_epochs,
40
+ ),
41
+ ]
42
+
43
+ param_scheduler_callback = dict(
44
+ type='ParamSchedulerHook'
45
+ )
46
+
47
+
48
+ evaluator_ = dict(
49
+ type='CocoPLMetric',
50
+ metric=['bbox', 'segm'],
51
+ proposal_nums=[1, 10, 100]
52
+ )
53
+
54
+ evaluator = dict(
55
+ # train_evaluator=evaluator_,
56
+ val_evaluator=evaluator_,
57
+ )
58
+
59
+
60
+ image_size = (1024, 1024)
61
+
62
+ data_preprocessor = dict(
63
+ type='mmdet.DetDataPreprocessor',
64
+ mean=[123.675, 116.28, 103.53],
65
+ std=[58.395, 57.12, 57.375],
66
+ bgr_to_rgb=True,
67
+ pad_size_divisor=32,
68
+ pad_mask=True,
69
+ mask_pad_value=0,
70
+ )
71
+
72
+ num_things_classes = 1
73
+ num_stuff_classes = 0
74
+ num_classes = num_things_classes + num_stuff_classes
75
+ prompt_shape = (90, 4)
76
+
77
+
78
+ model_cfg = dict(
79
+ type='SegSAMPLer',
80
+ hyperparameters=dict(
81
+ optimizer=optimizer,
82
+ param_scheduler=param_scheduler,
83
+ evaluator=evaluator,
84
+ ),
85
+ need_train_names=sub_model_train,
86
+ data_preprocessor=data_preprocessor,
87
+ backbone=dict(
88
+ type='vit_h',
89
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
90
+ # type='vit_b',
91
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
92
+ ),
93
+ panoptic_head=dict(
94
+ type='SAMInstanceHead',
95
+ num_things_classes=num_things_classes,
96
+ num_stuff_classes=num_stuff_classes,
97
+ with_multiscale=True,
98
+ with_sincos=True,
99
+ prompt_neck=dict(
100
+ type='SAMTransformerEDPromptGenNeck',
101
+ prompt_shape=prompt_shape,
102
+ in_channels=[1280] * 32,
103
+ inner_channels=64,
104
+ selected_channels=range(4, 32, 2),
105
+ # in_channels=[768] * 8,
106
+ num_encoders=1,
107
+ num_decoders=4,
108
+ out_channels=256
109
+ ),
110
+ loss_cls=dict(
111
+ type='mmdet.CrossEntropyLoss',
112
+ use_sigmoid=False,
113
+ loss_weight=2.0,
114
+ reduction='mean',
115
+ class_weight=[1.0] * num_classes + [0.1]),
116
+ loss_mask=dict(
117
+ type='mmdet.CrossEntropyLoss',
118
+ use_sigmoid=True,
119
+ reduction='mean',
120
+ loss_weight=5.0),
121
+ loss_dice=dict(
122
+ type='mmdet.DiceLoss',
123
+ use_sigmoid=True,
124
+ activate=True,
125
+ reduction='mean',
126
+ naive_dice=True,
127
+ eps=1.0,
128
+ loss_weight=5.0)),
129
+ panoptic_fusion_head=dict(
130
+ type='mmdet.MaskFormerFusionHead',
131
+ num_things_classes=num_things_classes,
132
+ num_stuff_classes=num_stuff_classes,
133
+ loss_panoptic=None,
134
+ init_cfg=None),
135
+ train_cfg=dict(
136
+ num_points=12544,
137
+ oversample_ratio=3.0,
138
+ importance_sample_ratio=0.75,
139
+ assigner=dict(
140
+ type='mmdet.HungarianAssigner',
141
+ match_costs=[
142
+ dict(type='mmdet.ClassificationCost', weight=2.0),
143
+ dict(
144
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
145
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
146
+ ]),
147
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
148
+ test_cfg=dict(
149
+ panoptic_on=False,
150
+ # For now, the dataset does not support
151
+ # evaluating semantic segmentation metric.
152
+ semantic_on=False,
153
+ instance_on=True,
154
+ # max_per_image is for instance segmentation.
155
+ max_per_image=80,
156
+ iou_thr=0.8,
157
+ # In Mask2Former's panoptic postprocessing,
158
+ # it will filter mask area where score is less than 0.5 .
159
+ filter_low_score=True),
160
+ )
161
+
162
+ task_name = 'whu_ins'
163
+ exp_name = 'E20230603_0'
164
+ logger = dict(
165
+ type='WandbLogger',
166
+ project=task_name,
167
+ group='sam',
168
+ name=exp_name
169
+ )
170
+ # logger = None
171
+
172
+
173
+ callbacks = [
174
+ param_scheduler_callback,
175
+ dict(
176
+ type='ModelCheckpoint',
177
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
178
+ save_last=True,
179
+ mode='max',
180
+ monitor='valsegm_map_0',
181
+ save_top_k=2,
182
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
183
+ ),
184
+ dict(
185
+ type='LearningRateMonitor',
186
+ logging_interval='step'
187
+ )
188
+ ]
189
+
190
+
191
+ trainer_cfg = dict(
192
+ compiled_model=False,
193
+ accelerator="auto",
194
+ strategy="auto",
195
+ # strategy="ddp",
196
+ # strategy='ddp_find_unused_parameters_true',
197
+ # precision='32',
198
+ # precision='16-mixed',
199
+ devices=8,
200
+ default_root_dir=f'results/{task_name}/{exp_name}',
201
+ # default_root_dir='results/tmp',
202
+ max_epochs=max_epochs,
203
+ logger=logger,
204
+ callbacks=callbacks,
205
+ log_every_n_steps=20,
206
+ check_val_every_n_epoch=5,
207
+ benchmark=True,
208
+ # sync_batchnorm=True,
209
+ # fast_dev_run=True,
210
+
211
+ # limit_train_batches=1,
212
+ # limit_val_batches=0,
213
+ # limit_test_batches=None,
214
+ # limit_predict_batches=None,
215
+ # overfit_batches=0.0,
216
+
217
+ # val_check_interval=None,
218
+ # num_sanity_val_steps=0,
219
+ # enable_checkpointing=None,
220
+ # enable_progress_bar=None,
221
+ # enable_model_summary=None,
222
+ # accumulate_grad_batches=32,
223
+ # gradient_clip_val=15,
224
+ # gradient_clip_algorithm='norm',
225
+ # deterministic=None,
226
+ # inference_mode: bool=True,
227
+ use_distributed_sampler=True,
228
+ # profiler="simple",
229
+ # detect_anomaly=False,
230
+ # barebones=False,
231
+ # plugins=None,
232
+ # reload_dataloaders_every_n_epochs=0,
233
+ )
234
+
235
+
236
+ backend_args = None
237
+ train_pipeline = [
238
+ dict(type='mmdet.LoadImageFromFile'),
239
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
240
+ dict(type='mmdet.Resize', scale=image_size),
241
+ dict(type='mmdet.RandomFlip', prob=0.5),
242
+ dict(type='mmdet.PackDetInputs')
243
+ ]
244
+
245
+ test_pipeline = [
246
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
247
+ dict(type='mmdet.Resize', scale=image_size),
248
+ # If you don't have a gt annotation, delete the pipeline
249
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
250
+ dict(
251
+ type='mmdet.PackDetInputs',
252
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
253
+ 'scale_factor'))
254
+ ]
255
+
256
+
257
+ train_batch_size_per_gpu = 3
258
+ train_num_workers = 2
259
+ test_batch_size_per_gpu = 3
260
+ test_num_workers = 2
261
+ persistent_workers = True
262
+
263
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
264
+ train_data_prefix = 'train/'
265
+ val_data_prefix = 'test/'
266
+
267
+ dataset_type = 'WHUInsSegDataset'
268
+
269
+ val_loader = dict(
270
+ batch_size=test_batch_size_per_gpu,
271
+ num_workers=test_num_workers,
272
+ persistent_workers=persistent_workers,
273
+ pin_memory=True,
274
+ dataset=dict(
275
+ type=dataset_type,
276
+ data_root=data_parent,
277
+ ann_file='annotations/WHU_building_test.json',
278
+ data_prefix=dict(img_path=val_data_prefix + '/image', seg_path=val_data_prefix + '/label'),
279
+ test_mode=True,
280
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
281
+ pipeline=test_pipeline,
282
+ backend_args=backend_args))
283
+
284
+ datamodule_cfg = dict(
285
+ type='PLDataModule',
286
+ train_loader=dict(
287
+ batch_size=train_batch_size_per_gpu,
288
+ num_workers=train_num_workers,
289
+ persistent_workers=persistent_workers,
290
+ pin_memory=True,
291
+ dataset=dict(
292
+ type=dataset_type,
293
+ data_root=data_parent,
294
+ ann_file='annotations/WHU_building_train.json',
295
+ data_prefix=dict(img_path=train_data_prefix + '/image', seg_path=train_data_prefix + '/label'),
296
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
297
+ pipeline=train_pipeline,
298
+ backend_args=backend_args)
299
+ ),
300
+ val_loader=val_loader,
301
+ # test_loader=val_loader
302
+ predict_loader=val_loader
303
+ )
configs/rsprompter/samdet_fasterrcnn_nwpu_config.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'whole_model'
5
+ ]
6
+
7
+ sub_model_optim = {
8
+ 'whole_model': {'lr_mult': 1},
9
+ }
10
+
11
+ max_epochs = 1000
12
+
13
+ optimizer = dict(
14
+ type='AdamW',
15
+ sub_model=sub_model_optim,
16
+ lr=0.0005,
17
+ weight_decay=1e-3
18
+ )
19
+
20
+ param_scheduler = [
21
+ # warm up learning rate scheduler
22
+ dict(
23
+ type='LinearLR',
24
+ start_factor=5e-4,
25
+ by_epoch=True,
26
+ begin=0,
27
+ end=1,
28
+ # update by iter
29
+ convert_to_iter_based=True),
30
+ # main learning rate scheduler
31
+ dict(
32
+ type='CosineAnnealingLR',
33
+ T_max=max_epochs,
34
+ by_epoch=True,
35
+ begin=1,
36
+ end=max_epochs,
37
+ ),
38
+ ]
39
+
40
+ param_scheduler_callback = dict(
41
+ type='ParamSchedulerHook'
42
+ )
43
+
44
+ evaluator_ = dict(
45
+ type='CocoPLMetric',
46
+ metric=['bbox', 'segm'],
47
+ proposal_nums=[1, 10, 100]
48
+ )
49
+
50
+ evaluator = dict(
51
+ # train_evaluator=evaluator_,
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 10
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+
72
+ model = dict(
73
+ type='mmdet.FasterRCNN',
74
+ data_preprocessor=data_preprocessor,
75
+ backbone=dict(
76
+ type='mmdet.ResNet',
77
+ depth=50,
78
+ num_stages=4,
79
+ out_indices=(0, 1, 2, 3),
80
+ frozen_stages=1,
81
+ norm_cfg=dict(type='BN', requires_grad=True),
82
+ norm_eval=True,
83
+ style='pytorch',
84
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
85
+ neck=dict(
86
+ type='mmdet.FPN',
87
+ in_channels=[256, 512, 1024, 2048],
88
+ out_channels=256,
89
+ num_outs=5),
90
+ rpn_head=dict(
91
+ type='mmdet.RPNHead',
92
+ in_channels=256,
93
+ feat_channels=256,
94
+ anchor_generator=dict(
95
+ type='mmdet.AnchorGenerator',
96
+ scales=[8],
97
+ ratios=[0.5, 1.0, 2.0],
98
+ strides=[4, 8, 16, 32, 64]),
99
+ bbox_coder=dict(
100
+ type='mmdet.DeltaXYWHBBoxCoder',
101
+ target_means=[.0, .0, .0, .0],
102
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
103
+ loss_cls=dict(
104
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
105
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
106
+ roi_head=dict(
107
+ type='mmdet.StandardRoIHead',
108
+ bbox_roi_extractor=dict(
109
+ type='mmdet.SingleRoIExtractor',
110
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
111
+ out_channels=256,
112
+ featmap_strides=[4, 8, 16, 32]),
113
+ bbox_head=dict(
114
+ type='mmdet.Shared2FCBBoxHead',
115
+ in_channels=256,
116
+ fc_out_channels=1024,
117
+ roi_feat_size=7,
118
+ num_classes=80,
119
+ bbox_coder=dict(
120
+ type='mmdet.DeltaXYWHBBoxCoder',
121
+ target_means=[0., 0., 0., 0.],
122
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
123
+ reg_class_agnostic=False,
124
+ loss_cls=dict(
125
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
126
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0))),
127
+ # model training and testing settings
128
+ train_cfg=dict(
129
+ rpn=dict(
130
+ assigner=dict(
131
+ type='mmdet.MaxIoUAssigner',
132
+ pos_iou_thr=0.7,
133
+ neg_iou_thr=0.3,
134
+ min_pos_iou=0.3,
135
+ match_low_quality=True,
136
+ ignore_iof_thr=-1),
137
+ sampler=dict(
138
+ type='mmdet.RandomSampler',
139
+ num=256,
140
+ pos_fraction=0.5,
141
+ neg_pos_ub=-1,
142
+ add_gt_as_proposals=False),
143
+ allowed_border=-1,
144
+ pos_weight=-1,
145
+ debug=False),
146
+ rpn_proposal=dict(
147
+ nms_pre=2000,
148
+ max_per_img=1000,
149
+ nms=dict(type='nms', iou_threshold=0.7),
150
+ min_bbox_size=0),
151
+ rcnn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.5,
155
+ neg_iou_thr=0.5,
156
+ min_pos_iou=0.5,
157
+ match_low_quality=False,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=512,
162
+ pos_fraction=0.25,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=True),
165
+ pos_weight=-1,
166
+ debug=False)),
167
+ test_cfg=dict(
168
+ rpn=dict(
169
+ nms_pre=1000,
170
+ max_per_img=1000,
171
+ nms=dict(type='nms', iou_threshold=0.7),
172
+ min_bbox_size=0),
173
+ rcnn=dict(
174
+ score_thr=0.05,
175
+ nms=dict(type='nms', iou_threshold=0.5),
176
+ max_per_img=100)
177
+ # soft-nms is also supported for rcnn testing
178
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
179
+ ))
180
+
181
+ model_cfg = dict(
182
+ type='SegSAMDetPLer',
183
+ hyperparameters=dict(
184
+ optimizer=optimizer,
185
+ param_scheduler=param_scheduler,
186
+ evaluator=evaluator,
187
+ ),
188
+ need_train_names=sub_model_train,
189
+ whole_model=model,
190
+ backbone=dict(
191
+ type='vit_h',
192
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
193
+ # type='vit_b',
194
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
195
+ )
196
+ )
197
+
198
+ task_name = 'nwpu_ins'
199
+ exp_name = 'E20230531_9'
200
+ logger = dict(
201
+ type='WandbLogger',
202
+ project=task_name,
203
+ group='samdet',
204
+ name=exp_name
205
+ )
206
+ # logger = None
207
+
208
+ callbacks = [
209
+ param_scheduler_callback,
210
+ dict(
211
+ type='ModelCheckpoint',
212
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
213
+ save_last=True,
214
+ mode='max',
215
+ monitor='valsegm_map_0',
216
+ save_top_k=2,
217
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
218
+ ),
219
+ dict(
220
+ type='LearningRateMonitor',
221
+ logging_interval='step'
222
+ )
223
+ ]
224
+
225
+
226
+ trainer_cfg = dict(
227
+ compiled_model=False,
228
+ accelerator="auto",
229
+ # strategy="auto",
230
+ # strategy="ddp",
231
+ strategy='ddp_find_unused_parameters_true',
232
+ # precision='32',
233
+ # precision='16-mixed',
234
+ devices=8,
235
+ default_root_dir=f'results/{task_name}/{exp_name}',
236
+ # default_root_dir='results/tmp',
237
+ max_epochs=max_epochs,
238
+ logger=logger,
239
+ callbacks=callbacks,
240
+ log_every_n_steps=5,
241
+ check_val_every_n_epoch=5,
242
+ benchmark=True,
243
+ # sync_batchnorm=True,
244
+ # fast_dev_run=True,
245
+
246
+ # limit_train_batches=1,
247
+ # limit_val_batches=0,
248
+ # limit_test_batches=None,
249
+ # limit_predict_batches=None,
250
+ # overfit_batches=0.0,
251
+
252
+ # val_check_interval=None,
253
+ # num_sanity_val_steps=0,
254
+ # enable_checkpointing=None,
255
+ # enable_progress_bar=None,
256
+ # enable_model_summary=None,
257
+ # accumulate_grad_batches=32,
258
+ # gradient_clip_val=15,
259
+ # gradient_clip_algorithm='norm',
260
+ # deterministic=None,
261
+ # inference_mode: bool=True,
262
+ use_distributed_sampler=True,
263
+ # profiler="simple",
264
+ # detect_anomaly=False,
265
+ # barebones=False,
266
+ # plugins=None,
267
+ # reload_dataloaders_every_n_epochs=0,
268
+ )
269
+
270
+
271
+ backend_args = None
272
+ train_pipeline = [
273
+ dict(type='mmdet.LoadImageFromFile'),
274
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
275
+ dict(type='mmdet.Resize', scale=image_size),
276
+ dict(type='mmdet.RandomFlip', prob=0.5),
277
+ dict(type='mmdet.PackDetInputs')
278
+ ]
279
+
280
+ test_pipeline = [
281
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
282
+ dict(type='mmdet.Resize', scale=image_size),
283
+ # If you don't have a gt annotation, delete the pipeline
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(
286
+ type='mmdet.PackDetInputs',
287
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
288
+ 'scale_factor'))
289
+ ]
290
+
291
+
292
+ train_batch_size_per_gpu = 4
293
+ train_num_workers = 4
294
+ test_batch_size_per_gpu = 4
295
+ test_num_workers = 4
296
+ persistent_workers = True
297
+
298
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
299
+ train_data_prefix = ''
300
+ val_data_prefix = ''
301
+
302
+ dataset_type = 'NWPUInsSegDataset'
303
+
304
+ val_loader = dict(
305
+ batch_size=test_batch_size_per_gpu,
306
+ num_workers=test_num_workers,
307
+ persistent_workers=persistent_workers,
308
+ pin_memory=True,
309
+ dataset=dict(
310
+ type=dataset_type,
311
+ data_root=data_parent,
312
+ ann_file='NWPU_instances_val.json',
313
+ data_prefix=dict(img_path='positive image set'),
314
+ test_mode=True,
315
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
316
+ pipeline=test_pipeline,
317
+ backend_args=backend_args))
318
+
319
+ datamodule_cfg = dict(
320
+ type='PLDataModule',
321
+ train_loader=dict(
322
+ batch_size=train_batch_size_per_gpu,
323
+ num_workers=train_num_workers,
324
+ persistent_workers=persistent_workers,
325
+ pin_memory=True,
326
+ dataset=dict(
327
+ type=dataset_type,
328
+ data_root=data_parent,
329
+ ann_file='NWPU_instances_train.json',
330
+ data_prefix=dict(img_path='positive image set'),
331
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
332
+ pipeline=train_pipeline,
333
+ backend_args=backend_args)
334
+ ),
335
+ val_loader=val_loader,
336
+ # test_loader=val_loader
337
+ predict_loader=val_loader
338
+ )
configs/rsprompter/samdet_fasterrcnn_ssdd_config.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'whole_model'
5
+ ]
6
+
7
+ sub_model_optim = {
8
+ 'whole_model': {'lr_mult': 1},
9
+ }
10
+
11
+ max_epochs = 1000
12
+
13
+ optimizer = dict(
14
+ type='AdamW',
15
+ sub_model=sub_model_optim,
16
+ lr=0.0005,
17
+ weight_decay=1e-3
18
+ )
19
+
20
+ param_scheduler = [
21
+ # warm up learning rate scheduler
22
+ dict(
23
+ type='LinearLR',
24
+ start_factor=5e-4,
25
+ by_epoch=True,
26
+ begin=0,
27
+ end=1,
28
+ # update by iter
29
+ convert_to_iter_based=True),
30
+ # main learning rate scheduler
31
+ dict(
32
+ type='CosineAnnealingLR',
33
+ T_max=max_epochs,
34
+ by_epoch=True,
35
+ begin=1,
36
+ end=max_epochs,
37
+ ),
38
+ ]
39
+
40
+ param_scheduler_callback = dict(
41
+ type='ParamSchedulerHook'
42
+ )
43
+
44
+ evaluator_ = dict(
45
+ type='CocoPLMetric',
46
+ metric=['bbox', 'segm'],
47
+ proposal_nums=[1, 10, 100]
48
+ )
49
+
50
+ evaluator = dict(
51
+ # train_evaluator=evaluator_,
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 1
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+
72
+ model = dict(
73
+ type='mmdet.FasterRCNN',
74
+ data_preprocessor=data_preprocessor,
75
+ backbone=dict(
76
+ type='mmdet.ResNet',
77
+ depth=50,
78
+ num_stages=4,
79
+ out_indices=(0, 1, 2, 3),
80
+ frozen_stages=1,
81
+ norm_cfg=dict(type='BN', requires_grad=True),
82
+ norm_eval=True,
83
+ style='pytorch',
84
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
85
+ neck=dict(
86
+ type='mmdet.FPN',
87
+ in_channels=[256, 512, 1024, 2048],
88
+ out_channels=256,
89
+ num_outs=5),
90
+ rpn_head=dict(
91
+ type='mmdet.RPNHead',
92
+ in_channels=256,
93
+ feat_channels=256,
94
+ anchor_generator=dict(
95
+ type='mmdet.AnchorGenerator',
96
+ scales=[8],
97
+ ratios=[0.5, 1.0, 2.0],
98
+ strides=[4, 8, 16, 32, 64]),
99
+ bbox_coder=dict(
100
+ type='mmdet.DeltaXYWHBBoxCoder',
101
+ target_means=[.0, .0, .0, .0],
102
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
103
+ loss_cls=dict(
104
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
105
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
106
+ roi_head=dict(
107
+ type='mmdet.StandardRoIHead',
108
+ bbox_roi_extractor=dict(
109
+ type='mmdet.SingleRoIExtractor',
110
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
111
+ out_channels=256,
112
+ featmap_strides=[4, 8, 16, 32]),
113
+ bbox_head=dict(
114
+ type='mmdet.Shared2FCBBoxHead',
115
+ in_channels=256,
116
+ fc_out_channels=1024,
117
+ roi_feat_size=7,
118
+ num_classes=80,
119
+ bbox_coder=dict(
120
+ type='mmdet.DeltaXYWHBBoxCoder',
121
+ target_means=[0., 0., 0., 0.],
122
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
123
+ reg_class_agnostic=False,
124
+ loss_cls=dict(
125
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
126
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0))),
127
+ # model training and testing settings
128
+ train_cfg=dict(
129
+ rpn=dict(
130
+ assigner=dict(
131
+ type='mmdet.MaxIoUAssigner',
132
+ pos_iou_thr=0.7,
133
+ neg_iou_thr=0.3,
134
+ min_pos_iou=0.3,
135
+ match_low_quality=True,
136
+ ignore_iof_thr=-1),
137
+ sampler=dict(
138
+ type='mmdet.RandomSampler',
139
+ num=256,
140
+ pos_fraction=0.5,
141
+ neg_pos_ub=-1,
142
+ add_gt_as_proposals=False),
143
+ allowed_border=-1,
144
+ pos_weight=-1,
145
+ debug=False),
146
+ rpn_proposal=dict(
147
+ nms_pre=2000,
148
+ max_per_img=1000,
149
+ nms=dict(type='nms', iou_threshold=0.7),
150
+ min_bbox_size=0),
151
+ rcnn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.5,
155
+ neg_iou_thr=0.5,
156
+ min_pos_iou=0.5,
157
+ match_low_quality=False,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=512,
162
+ pos_fraction=0.25,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=True),
165
+ pos_weight=-1,
166
+ debug=False)),
167
+ test_cfg=dict(
168
+ rpn=dict(
169
+ nms_pre=1000,
170
+ max_per_img=1000,
171
+ nms=dict(type='nms', iou_threshold=0.7),
172
+ min_bbox_size=0),
173
+ rcnn=dict(
174
+ score_thr=0.05,
175
+ nms=dict(type='nms', iou_threshold=0.5),
176
+ max_per_img=100)
177
+ # soft-nms is also supported for rcnn testing
178
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
179
+ ))
180
+
181
+ model_cfg = dict(
182
+ type='SegSAMDetPLer',
183
+ hyperparameters=dict(
184
+ optimizer=optimizer,
185
+ param_scheduler=param_scheduler,
186
+ evaluator=evaluator,
187
+ ),
188
+ need_train_names=sub_model_train,
189
+ whole_model=model,
190
+ backbone=dict(
191
+ type='vit_h',
192
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
193
+ # type='vit_b',
194
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
195
+ )
196
+ )
197
+
198
+ task_name = 'ssdd_ins'
199
+ exp_name = 'E20230531_8'
200
+ logger = dict(
201
+ type='WandbLogger',
202
+ project=task_name,
203
+ group='samdet',
204
+ name=exp_name
205
+ )
206
+ # logger = None
207
+
208
+ callbacks = [
209
+ param_scheduler_callback,
210
+ dict(
211
+ type='ModelCheckpoint',
212
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
213
+ save_last=True,
214
+ mode='max',
215
+ monitor='valsegm_map_0',
216
+ save_top_k=2,
217
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
218
+ ),
219
+ dict(
220
+ type='LearningRateMonitor',
221
+ logging_interval='step'
222
+ )
223
+ ]
224
+
225
+
226
+ trainer_cfg = dict(
227
+ compiled_model=False,
228
+ accelerator="auto",
229
+ # strategy="auto",
230
+ # strategy="ddp",
231
+ strategy='ddp_find_unused_parameters_true',
232
+ # precision='32',
233
+ # precision='16-mixed',
234
+ devices=8,
235
+ default_root_dir=f'results/{task_name}/{exp_name}',
236
+ # default_root_dir='results/tmp',
237
+ max_epochs=max_epochs,
238
+ logger=logger,
239
+ callbacks=callbacks,
240
+ log_every_n_steps=5,
241
+ check_val_every_n_epoch=5,
242
+ benchmark=True,
243
+ # sync_batchnorm=True,
244
+ # fast_dev_run=True,
245
+
246
+ # limit_train_batches=1,
247
+ # limit_val_batches=0,
248
+ # limit_test_batches=None,
249
+ # limit_predict_batches=None,
250
+ # overfit_batches=0.0,
251
+
252
+ # val_check_interval=None,
253
+ # num_sanity_val_steps=0,
254
+ # enable_checkpointing=None,
255
+ # enable_progress_bar=None,
256
+ # enable_model_summary=None,
257
+ # accumulate_grad_batches=32,
258
+ # gradient_clip_val=15,
259
+ # gradient_clip_algorithm='norm',
260
+ # deterministic=None,
261
+ # inference_mode: bool=True,
262
+ use_distributed_sampler=True,
263
+ # profiler="simple",
264
+ # detect_anomaly=False,
265
+ # barebones=False,
266
+ # plugins=None,
267
+ # reload_dataloaders_every_n_epochs=0,
268
+ )
269
+
270
+
271
+ backend_args = None
272
+ train_pipeline = [
273
+ dict(type='mmdet.LoadImageFromFile'),
274
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
275
+ dict(type='mmdet.Resize', scale=image_size),
276
+ dict(type='mmdet.RandomFlip', prob=0.5),
277
+ dict(type='mmdet.PackDetInputs')
278
+ ]
279
+
280
+ test_pipeline = [
281
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
282
+ dict(type='mmdet.Resize', scale=image_size),
283
+ # If you don't have a gt annotation, delete the pipeline
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(
286
+ type='mmdet.PackDetInputs',
287
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
288
+ 'scale_factor'))
289
+ ]
290
+
291
+
292
+ train_batch_size_per_gpu = 4
293
+ train_num_workers = 4
294
+ test_batch_size_per_gpu = 4
295
+ test_num_workers = 4
296
+ persistent_workers = True
297
+
298
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
299
+ dataset_type = 'SSDDInsSegDataset'
300
+
301
+
302
+ val_loader = dict(
303
+ batch_size=test_batch_size_per_gpu,
304
+ num_workers=test_num_workers,
305
+ persistent_workers=persistent_workers,
306
+ pin_memory=True,
307
+ dataset=dict(
308
+ type=dataset_type,
309
+ data_root=data_parent,
310
+ # ann_file='NWPU_instances_val.json',
311
+ # data_prefix=dict(img_path='positive image set'),
312
+ ann_file='annotations/SSDD_instances_val.json',
313
+ data_prefix=dict(img_path='imgs'),
314
+ # ann_file='annotations/WHU_building_test.json',
315
+ # data_prefix=dict(img_path=val_data_prefix + '/image'),
316
+ test_mode=True,
317
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
318
+ pipeline=test_pipeline,
319
+ backend_args=backend_args))
320
+
321
+ datamodule_cfg = dict(
322
+ type='PLDataModule',
323
+ train_loader=dict(
324
+ batch_size=train_batch_size_per_gpu,
325
+ num_workers=train_num_workers,
326
+ persistent_workers=persistent_workers,
327
+ pin_memory=True,
328
+ dataset=dict(
329
+ type=dataset_type,
330
+ data_root=data_parent,
331
+ # ann_file='NWPU_instances_train.json',
332
+ # data_prefix=dict(img_path='positive image set'),
333
+ ann_file='annotations/SSDD_instances_train.json',
334
+ data_prefix=dict(img_path='imgs'),
335
+ # ann_file='NWPU_instances_train.json',
336
+ # data_prefix=dict(img_path='positive image set'),
337
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
338
+ pipeline=train_pipeline,
339
+ backend_args=backend_args)
340
+ ),
341
+ val_loader=val_loader,
342
+ # test_loader=val_loader
343
+ predict_loader=val_loader
344
+ )
configs/rsprompter/samdet_fasterrcnn_whu_config.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'whole_model'
5
+ ]
6
+
7
+ sub_model_optim = {
8
+ 'whole_model': {'lr_mult': 1},
9
+ }
10
+
11
+ max_epochs = 100
12
+
13
+ optimizer = dict(
14
+ type='AdamW',
15
+ sub_model=sub_model_optim,
16
+ lr=0.0001,
17
+ weight_decay=1e-3
18
+ )
19
+
20
+ param_scheduler = [
21
+ # warm up learning rate scheduler
22
+ dict(
23
+ type='LinearLR',
24
+ start_factor=1e-4,
25
+ by_epoch=True,
26
+ begin=0,
27
+ end=1,
28
+ # update by iter
29
+ convert_to_iter_based=True),
30
+ # main learning rate scheduler
31
+ dict(
32
+ type='CosineAnnealingLR',
33
+ T_max=max_epochs,
34
+ by_epoch=True,
35
+ begin=1,
36
+ end=max_epochs,
37
+ ),
38
+ ]
39
+
40
+ param_scheduler_callback = dict(
41
+ type='ParamSchedulerHook'
42
+ )
43
+
44
+ evaluator_ = dict(
45
+ type='CocoPLMetric',
46
+ metric=['bbox', 'segm'],
47
+ proposal_nums=[1, 10, 100]
48
+ )
49
+
50
+ evaluator = dict(
51
+ # train_evaluator=evaluator_,
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 1
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+
72
+ model = dict(
73
+ type='mmdet.FasterRCNN',
74
+ data_preprocessor=data_preprocessor,
75
+ backbone=dict(
76
+ type='mmdet.ResNet',
77
+ depth=50,
78
+ num_stages=4,
79
+ out_indices=(0, 1, 2, 3),
80
+ frozen_stages=1,
81
+ norm_cfg=dict(type='BN', requires_grad=True),
82
+ norm_eval=True,
83
+ style='pytorch',
84
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
85
+ neck=dict(
86
+ type='mmdet.FPN',
87
+ in_channels=[256, 512, 1024, 2048],
88
+ out_channels=256,
89
+ num_outs=5),
90
+ rpn_head=dict(
91
+ type='mmdet.RPNHead',
92
+ in_channels=256,
93
+ feat_channels=256,
94
+ anchor_generator=dict(
95
+ type='mmdet.AnchorGenerator',
96
+ scales=[8],
97
+ ratios=[0.5, 1.0, 2.0],
98
+ strides=[4, 8, 16, 32, 64]),
99
+ bbox_coder=dict(
100
+ type='mmdet.DeltaXYWHBBoxCoder',
101
+ target_means=[.0, .0, .0, .0],
102
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
103
+ loss_cls=dict(
104
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
105
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
106
+ roi_head=dict(
107
+ type='mmdet.StandardRoIHead',
108
+ bbox_roi_extractor=dict(
109
+ type='mmdet.SingleRoIExtractor',
110
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
111
+ out_channels=256,
112
+ featmap_strides=[4, 8, 16, 32]),
113
+ bbox_head=dict(
114
+ type='mmdet.Shared2FCBBoxHead',
115
+ in_channels=256,
116
+ fc_out_channels=1024,
117
+ roi_feat_size=7,
118
+ num_classes=80,
119
+ bbox_coder=dict(
120
+ type='mmdet.DeltaXYWHBBoxCoder',
121
+ target_means=[0., 0., 0., 0.],
122
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
123
+ reg_class_agnostic=False,
124
+ loss_cls=dict(
125
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
126
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0))),
127
+ # model training and testing settings
128
+ train_cfg=dict(
129
+ rpn=dict(
130
+ assigner=dict(
131
+ type='mmdet.MaxIoUAssigner',
132
+ pos_iou_thr=0.7,
133
+ neg_iou_thr=0.3,
134
+ min_pos_iou=0.3,
135
+ match_low_quality=True,
136
+ ignore_iof_thr=-1),
137
+ sampler=dict(
138
+ type='mmdet.RandomSampler',
139
+ num=256,
140
+ pos_fraction=0.5,
141
+ neg_pos_ub=-1,
142
+ add_gt_as_proposals=False),
143
+ allowed_border=-1,
144
+ pos_weight=-1,
145
+ debug=False),
146
+ rpn_proposal=dict(
147
+ nms_pre=2000,
148
+ max_per_img=1000,
149
+ nms=dict(type='nms', iou_threshold=0.7),
150
+ min_bbox_size=0),
151
+ rcnn=dict(
152
+ assigner=dict(
153
+ type='mmdet.MaxIoUAssigner',
154
+ pos_iou_thr=0.5,
155
+ neg_iou_thr=0.5,
156
+ min_pos_iou=0.5,
157
+ match_low_quality=False,
158
+ ignore_iof_thr=-1),
159
+ sampler=dict(
160
+ type='mmdet.RandomSampler',
161
+ num=512,
162
+ pos_fraction=0.25,
163
+ neg_pos_ub=-1,
164
+ add_gt_as_proposals=True),
165
+ pos_weight=-1,
166
+ debug=False)),
167
+ test_cfg=dict(
168
+ rpn=dict(
169
+ nms_pre=1000,
170
+ max_per_img=1000,
171
+ nms=dict(type='nms', iou_threshold=0.7),
172
+ min_bbox_size=0),
173
+ rcnn=dict(
174
+ score_thr=0.05,
175
+ nms=dict(type='nms', iou_threshold=0.5),
176
+ max_per_img=100)
177
+ # soft-nms is also supported for rcnn testing
178
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
179
+ ))
180
+
181
+ model_cfg = dict(
182
+ type='SegSAMDetPLer',
183
+ hyperparameters=dict(
184
+ optimizer=optimizer,
185
+ param_scheduler=param_scheduler,
186
+ evaluator=evaluator,
187
+ ),
188
+ need_train_names=sub_model_train,
189
+ whole_model=model,
190
+ backbone=dict(
191
+ type='vit_h',
192
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
193
+ # type='vit_b',
194
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
195
+ )
196
+ )
197
+
198
+ task_name = 'whu_ins'
199
+ exp_name = 'E20230602_3'
200
+ logger = dict(
201
+ type='WandbLogger',
202
+ project=task_name,
203
+ group='samdet',
204
+ name=exp_name
205
+ )
206
+ # logger = None
207
+
208
+ callbacks = [
209
+ param_scheduler_callback,
210
+ dict(
211
+ type='ModelCheckpoint',
212
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
213
+ save_last=True,
214
+ mode='max',
215
+ monitor='valsegm_map_0',
216
+ save_top_k=2,
217
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
218
+ ),
219
+ dict(
220
+ type='LearningRateMonitor',
221
+ logging_interval='step'
222
+ )
223
+ ]
224
+
225
+
226
+ trainer_cfg = dict(
227
+ compiled_model=False,
228
+ accelerator="auto",
229
+ # strategy="auto",
230
+ # strategy="ddp",
231
+ strategy='ddp_find_unused_parameters_true',
232
+ # precision='32',
233
+ # precision='16-mixed',
234
+ devices=8,
235
+ default_root_dir=f'results/{task_name}/{exp_name}',
236
+ # default_root_dir='results/tmp',
237
+ max_epochs=max_epochs,
238
+ logger=logger,
239
+ callbacks=callbacks,
240
+ log_every_n_steps=20,
241
+ check_val_every_n_epoch=3,
242
+ benchmark=True,
243
+ # sync_batchnorm=True,
244
+ # fast_dev_run=True,
245
+
246
+ # limit_train_batches=1,
247
+ # limit_val_batches=0,
248
+ # limit_test_batches=None,
249
+ # limit_predict_batches=None,
250
+ # overfit_batches=0.0,
251
+
252
+ # val_check_interval=None,
253
+ # num_sanity_val_steps=0,
254
+ # enable_checkpointing=None,
255
+ # enable_progress_bar=None,
256
+ # enable_model_summary=None,
257
+ # accumulate_grad_batches=32,
258
+ # gradient_clip_val=15,
259
+ # gradient_clip_algorithm='norm',
260
+ # deterministic=None,
261
+ # inference_mode: bool=True,
262
+ use_distributed_sampler=True,
263
+ # profiler="simple",
264
+ # detect_anomaly=False,
265
+ # barebones=False,
266
+ # plugins=None,
267
+ # reload_dataloaders_every_n_epochs=0,
268
+ )
269
+
270
+
271
+ backend_args = None
272
+ train_pipeline = [
273
+ dict(type='mmdet.LoadImageFromFile'),
274
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
275
+ dict(type='mmdet.Resize', scale=image_size),
276
+ dict(type='mmdet.RandomFlip', prob=0.5),
277
+ dict(type='mmdet.PackDetInputs')
278
+ ]
279
+
280
+ test_pipeline = [
281
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
282
+ dict(type='mmdet.Resize', scale=image_size),
283
+ # If you don't have a gt annotation, delete the pipeline
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(
286
+ type='mmdet.PackDetInputs',
287
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
288
+ 'scale_factor'))
289
+ ]
290
+
291
+
292
+ train_batch_size_per_gpu = 4
293
+ train_num_workers = 4
294
+ test_batch_size_per_gpu = 4
295
+ test_num_workers = 4
296
+ persistent_workers = True
297
+
298
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
299
+ train_data_prefix = 'train/'
300
+ val_data_prefix = 'test/'
301
+ dataset_type = 'WHUInsSegDataset'
302
+
303
+ val_loader = dict(
304
+ batch_size=test_batch_size_per_gpu,
305
+ num_workers=test_num_workers,
306
+ persistent_workers=persistent_workers,
307
+ pin_memory=True,
308
+ dataset=dict(
309
+ type=dataset_type,
310
+ data_root=data_parent,
311
+ # ann_file='NWPU_instances_val.json',
312
+ # data_prefix=dict(img_path='positive image set'),
313
+ # ann_file='annotations/SSDD_instances_val.json',
314
+ # data_prefix=dict(img_path='imgs'),
315
+ ann_file='annotations/WHU_building_test.json',
316
+ data_prefix=dict(img_path=val_data_prefix + '/image'),
317
+ test_mode=True,
318
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
319
+ pipeline=test_pipeline,
320
+ backend_args=backend_args))
321
+
322
+ datamodule_cfg = dict(
323
+ type='PLDataModule',
324
+ train_loader=dict(
325
+ batch_size=train_batch_size_per_gpu,
326
+ num_workers=train_num_workers,
327
+ persistent_workers=persistent_workers,
328
+ pin_memory=True,
329
+ dataset=dict(
330
+ type=dataset_type,
331
+ data_root=data_parent,
332
+ # ann_file='NWPU_instances_train.json',
333
+ # data_prefix=dict(img_path='positive image set'),
334
+ # ann_file='annotations/SSDD_instances_train.json',
335
+ # data_prefix=dict(img_path='imgs'),
336
+ ann_file='annotations/WHU_building_train.json',
337
+ data_prefix=dict(img_path=train_data_prefix + '/image'),
338
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
339
+ pipeline=train_pipeline,
340
+ backend_args=backend_args)
341
+ ),
342
+ val_loader=val_loader,
343
+ # test_loader=val_loader
344
+ predict_loader=val_loader
345
+ )
configs/rsprompter/samseg_mask2former_nwpu_config.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'sam_neck',
6
+ 'data_preprocessor'
7
+ ]
8
+
9
+ sub_model_optim = {
10
+ 'sam_neck': {'lr_mult': 1},
11
+ 'panoptic_head': {'lr_mult': 1},
12
+ }
13
+
14
+ max_epochs = 500
15
+
16
+ optimizer = dict(
17
+ type='AdamW',
18
+ sub_model=sub_model_optim,
19
+ lr=0.0001,
20
+ weight_decay=1e-3
21
+ )
22
+
23
+ param_scheduler = [
24
+ # warm up learning rate scheduler
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=1e-4,
28
+ by_epoch=True,
29
+ begin=0,
30
+ end=1,
31
+ # update by iter
32
+ convert_to_iter_based=True),
33
+ # main learning rate scheduler
34
+ dict(
35
+ type='CosineAnnealingLR',
36
+ T_max=max_epochs,
37
+ by_epoch=True,
38
+ begin=1,
39
+ end=max_epochs,
40
+ ),
41
+ ]
42
+
43
+ param_scheduler_callback = dict(
44
+ type='ParamSchedulerHook'
45
+ )
46
+
47
+ evaluator_ = dict(
48
+ type='CocoPLMetric',
49
+ metric=['bbox', 'segm'],
50
+ proposal_nums=[1, 10, 100]
51
+ )
52
+
53
+ evaluator = dict(
54
+ val_evaluator=evaluator_,
55
+ )
56
+
57
+
58
+ image_size = (1024, 1024)
59
+
60
+ data_preprocessor = dict(
61
+ type='mmdet.DetDataPreprocessor',
62
+ mean=[123.675, 116.28, 103.53],
63
+ std=[58.395, 57.12, 57.375],
64
+ bgr_to_rgb=True,
65
+ pad_size_divisor=32,
66
+ pad_mask=True,
67
+ mask_pad_value=0,
68
+ )
69
+
70
+ num_things_classes = 10
71
+ num_stuff_classes = 0
72
+ num_classes = num_things_classes + num_stuff_classes
73
+ num_queries = 90
74
+
75
+ model_cfg = dict(
76
+ type='SegSAMPLer',
77
+ hyperparameters=dict(
78
+ optimizer=optimizer,
79
+ param_scheduler=param_scheduler,
80
+ evaluator=evaluator,
81
+ ),
82
+ need_train_names=sub_model_train,
83
+ data_preprocessor=data_preprocessor,
84
+ backbone=dict(
85
+ type='vit_h',
86
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
87
+ # type='vit_b',
88
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
89
+ ),
90
+ sam_neck=dict(
91
+ type='SAMAggregatorNeck',
92
+ in_channels=[1280] * 32,
93
+ # in_channels=[768] * 12,
94
+ inner_channels=32,
95
+ selected_channels=range(8, 32, 3),
96
+ # selected_channels=range(4, 12, 2),
97
+ out_channels=256,
98
+ up_sample_scale=4,
99
+ ),
100
+ panoptic_head=dict(
101
+ type='mmdet.Mask2FormerHead',
102
+ in_channels=[256, 256, 256], # pass to pixel_decoder inside
103
+ strides=[8, 16, 32],
104
+ feat_channels=256,
105
+ out_channels=256,
106
+ num_things_classes=num_things_classes,
107
+ num_stuff_classes=num_stuff_classes,
108
+ num_queries=num_queries,
109
+ num_transformer_feat_level=3,
110
+ pixel_decoder=dict(
111
+ type='mmdet.MSDeformAttnPixelDecoder',
112
+ num_outs=3,
113
+ norm_cfg=dict(type='GN', num_groups=32),
114
+ act_cfg=dict(type='ReLU'),
115
+ encoder=dict( # DeformableDetrTransformerEncoder
116
+ # num_layers=6,
117
+ num_layers=2,
118
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
119
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
120
+ embed_dims=256,
121
+ num_heads=8,
122
+ num_levels=3,
123
+ num_points=4,
124
+ dropout=0.1,
125
+ batch_first=True),
126
+ ffn_cfg=dict(
127
+ embed_dims=256,
128
+ feedforward_channels=1024,
129
+ num_fcs=2,
130
+ ffn_drop=0.1,
131
+ act_cfg=dict(type='ReLU', inplace=True)))),
132
+ positional_encoding=dict(num_feats=128, normalize=True)),
133
+ enforce_decoder_input_project=False,
134
+ positional_encoding=dict(num_feats=128, normalize=True),
135
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
136
+ return_intermediate=True,
137
+ # num_layers=9,
138
+ num_layers=3,
139
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
140
+ self_attn_cfg=dict( # MultiheadAttention
141
+ embed_dims=256,
142
+ num_heads=8,
143
+ dropout=0.1,
144
+ batch_first=True),
145
+ cross_attn_cfg=dict( # MultiheadAttention
146
+ embed_dims=256,
147
+ num_heads=8,
148
+ dropout=0.1,
149
+ batch_first=True),
150
+ ffn_cfg=dict(
151
+ embed_dims=256,
152
+ feedforward_channels=2048,
153
+ num_fcs=2,
154
+ ffn_drop=0.1,
155
+ act_cfg=dict(type='ReLU', inplace=True))),
156
+ init_cfg=None),
157
+ loss_cls=dict(
158
+ type='mmdet.CrossEntropyLoss',
159
+ use_sigmoid=False,
160
+ loss_weight=2.0,
161
+ reduction='mean',
162
+ class_weight=[1.0] * num_classes + [0.1]),
163
+ loss_mask=dict(
164
+ type='mmdet.CrossEntropyLoss',
165
+ use_sigmoid=True,
166
+ reduction='mean',
167
+ loss_weight=5.0),
168
+ loss_dice=dict(
169
+ type='mmdet.DiceLoss',
170
+ use_sigmoid=True,
171
+ activate=True,
172
+ reduction='mean',
173
+ naive_dice=True,
174
+ eps=1.0,
175
+ loss_weight=5.0)),
176
+ panoptic_fusion_head=dict(
177
+ type='mmdet.MaskFormerFusionHead',
178
+ num_things_classes=num_things_classes,
179
+ num_stuff_classes=num_stuff_classes,
180
+ loss_panoptic=None,
181
+ init_cfg=None),
182
+ train_cfg=dict(
183
+ num_points=12544,
184
+ oversample_ratio=3.0,
185
+ importance_sample_ratio=0.75,
186
+ assigner=dict(
187
+ type='mmdet.HungarianAssigner',
188
+ match_costs=[
189
+ dict(type='mmdet.ClassificationCost', weight=2.0),
190
+ dict(
191
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
192
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
193
+ ]),
194
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
195
+ test_cfg=dict(
196
+ panoptic_on=False,
197
+ # For now, the dataset does not support
198
+ # evaluating semantic segmentation metric.
199
+ semantic_on=False,
200
+ instance_on=True,
201
+ # max_per_image is for instance segmentation.
202
+ max_per_image=num_queries,
203
+ iou_thr=0.8,
204
+ # In Mask2Former's panoptic postprocessing,
205
+ # it will filter mask area where score is less than 0.5 .
206
+ filter_low_score=True),
207
+ init_cfg=None)
208
+
209
+
210
+ task_name = 'nwpu_ins'
211
+ exp_name = 'E20230604_5'
212
+ logger = dict(
213
+ type='WandbLogger',
214
+ project=task_name,
215
+ group='samseg-mask2former',
216
+ name=exp_name
217
+ )
218
+ # logger = None
219
+
220
+ callbacks = [
221
+ param_scheduler_callback,
222
+ dict(
223
+ type='ModelCheckpoint',
224
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
225
+ save_last=True,
226
+ mode='max',
227
+ monitor='valsegm_map_0',
228
+ save_top_k=2,
229
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
230
+ ),
231
+ dict(
232
+ type='LearningRateMonitor',
233
+ logging_interval='step'
234
+ )
235
+ ]
236
+
237
+
238
+ trainer_cfg = dict(
239
+ compiled_model=False,
240
+ accelerator="auto",
241
+ strategy="auto",
242
+ # strategy="ddp",
243
+ # strategy='ddp_find_unused_parameters_true',
244
+ # precision='32',
245
+ # precision='16-mixed',
246
+ devices=8,
247
+ default_root_dir=f'results/{task_name}/{exp_name}',
248
+ # default_root_dir='results/tmp',
249
+ max_epochs=max_epochs,
250
+ logger=logger,
251
+ callbacks=callbacks,
252
+ log_every_n_steps=5,
253
+ check_val_every_n_epoch=5,
254
+ benchmark=True,
255
+ # sync_batchnorm=True,
256
+ # fast_dev_run=True,
257
+
258
+ # limit_train_batches=1,
259
+ # limit_val_batches=0,
260
+ # limit_test_batches=None,
261
+ # limit_predict_batches=None,
262
+ # overfit_batches=0.0,
263
+
264
+ # val_check_interval=None,
265
+ # num_sanity_val_steps=0,
266
+ # enable_checkpointing=None,
267
+ # enable_progress_bar=None,
268
+ # enable_model_summary=None,
269
+ # accumulate_grad_batches=32,
270
+ # gradient_clip_val=15,
271
+ # gradient_clip_algorithm='norm',
272
+ # deterministic=None,
273
+ # inference_mode: bool=True,
274
+ use_distributed_sampler=True,
275
+ # profiler="simple",
276
+ # detect_anomaly=False,
277
+ # barebones=False,
278
+ # plugins=None,
279
+ # reload_dataloaders_every_n_epochs=0,
280
+ )
281
+
282
+
283
+ backend_args = None
284
+ train_pipeline = [
285
+ dict(type='mmdet.LoadImageFromFile'),
286
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
287
+ dict(type='mmdet.Resize', scale=image_size),
288
+ dict(type='mmdet.RandomFlip', prob=0.5),
289
+ dict(type='mmdet.PackDetInputs')
290
+ ]
291
+
292
+ test_pipeline = [
293
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
294
+ dict(type='mmdet.Resize', scale=image_size),
295
+ # If you don't have a gt annotation, delete the pipeline
296
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
297
+ dict(
298
+ type='mmdet.PackDetInputs',
299
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
300
+ 'scale_factor'))
301
+ ]
302
+
303
+
304
+ train_batch_size_per_gpu = 4
305
+ train_num_workers = 4
306
+ test_batch_size_per_gpu = 4
307
+ test_num_workers = 4
308
+ persistent_workers = True
309
+
310
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
311
+ train_data_prefix = ''
312
+ val_data_prefix = ''
313
+
314
+ dataset_type = 'NWPUInsSegDataset'
315
+
316
+ val_loader = dict(
317
+ batch_size=test_batch_size_per_gpu,
318
+ num_workers=test_num_workers,
319
+ persistent_workers=persistent_workers,
320
+ pin_memory=True,
321
+ dataset=dict(
322
+ type=dataset_type,
323
+ data_root=data_parent,
324
+ ann_file='NWPU_instances_val.json',
325
+ data_prefix=dict(img_path='positive image set'),
326
+ test_mode=True,
327
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
328
+ pipeline=test_pipeline,
329
+ backend_args=backend_args))
330
+
331
+ datamodule_cfg = dict(
332
+ type='PLDataModule',
333
+ train_loader=dict(
334
+ batch_size=train_batch_size_per_gpu,
335
+ num_workers=train_num_workers,
336
+ persistent_workers=persistent_workers,
337
+ pin_memory=True,
338
+ dataset=dict(
339
+ type=dataset_type,
340
+ data_root=data_parent,
341
+ ann_file='NWPU_instances_train.json',
342
+ data_prefix=dict(img_path='positive image set'),
343
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
344
+ pipeline=train_pipeline,
345
+ backend_args=backend_args)
346
+ ),
347
+ val_loader=val_loader,
348
+ # test_loader=val_loader
349
+ predict_loader=val_loader
350
+ )
configs/rsprompter/samseg_mask2former_ssdd_config.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'sam_neck',
6
+ 'data_preprocessor'
7
+ ]
8
+
9
+ sub_model_optim = {
10
+ 'sam_neck': {'lr_mult': 1},
11
+ 'panoptic_head': {'lr_mult': 1},
12
+ }
13
+
14
+ max_epochs = 600
15
+
16
+ optimizer = dict(
17
+ type='AdamW',
18
+ sub_model=sub_model_optim,
19
+ lr=0.0005,
20
+ weight_decay=1e-3
21
+ )
22
+
23
+ param_scheduler = [
24
+ # warm up learning rate scheduler
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=5e-4,
28
+ by_epoch=True,
29
+ begin=0,
30
+ end=1,
31
+ # update by iter
32
+ convert_to_iter_based=True),
33
+ # main learning rate scheduler
34
+ dict(
35
+ type='CosineAnnealingLR',
36
+ T_max=max_epochs,
37
+ by_epoch=True,
38
+ begin=1,
39
+ end=max_epochs,
40
+ ),
41
+ ]
42
+
43
+ param_scheduler_callback = dict(
44
+ type='ParamSchedulerHook'
45
+ )
46
+
47
+ evaluator_ = dict(
48
+ type='CocoPLMetric',
49
+ metric=['bbox', 'segm'],
50
+ proposal_nums=[1, 10, 100]
51
+ )
52
+
53
+ evaluator = dict(
54
+ val_evaluator=evaluator_,
55
+ )
56
+
57
+
58
+ image_size = (1024, 1024)
59
+
60
+ data_preprocessor = dict(
61
+ type='mmdet.DetDataPreprocessor',
62
+ mean=[123.675, 116.28, 103.53],
63
+ std=[58.395, 57.12, 57.375],
64
+ bgr_to_rgb=True,
65
+ pad_size_divisor=32,
66
+ pad_mask=True,
67
+ mask_pad_value=0,
68
+ )
69
+
70
+ num_things_classes = 1
71
+ num_stuff_classes = 0
72
+ num_classes = num_things_classes + num_stuff_classes
73
+ num_queries = 30
74
+
75
+ model_cfg = dict(
76
+ type='SegSAMPLer',
77
+ hyperparameters=dict(
78
+ optimizer=optimizer,
79
+ param_scheduler=param_scheduler,
80
+ evaluator=evaluator,
81
+ ),
82
+ need_train_names=sub_model_train,
83
+ data_preprocessor=data_preprocessor,
84
+ backbone=dict(
85
+ type='vit_h',
86
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
87
+ # type='vit_b',
88
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
89
+ ),
90
+ sam_neck=dict(
91
+ type='SAMAggregatorNeck',
92
+ in_channels=[1280] * 32,
93
+ # in_channels=[768] * 12,
94
+ inner_channels=32,
95
+ selected_channels=range(4, 32, 2),
96
+ # selected_channels=range(4, 12, 2),
97
+ out_channels=256,
98
+ up_sample_scale=4,
99
+ ),
100
+ panoptic_head=dict(
101
+ type='mmdet.Mask2FormerHead',
102
+ in_channels=[256, 256, 256], # pass to pixel_decoder inside
103
+ strides=[8, 16, 32],
104
+ feat_channels=256,
105
+ out_channels=256,
106
+ num_things_classes=num_things_classes,
107
+ num_stuff_classes=num_stuff_classes,
108
+ num_queries=num_queries,
109
+ num_transformer_feat_level=3,
110
+ pixel_decoder=dict(
111
+ type='mmdet.MSDeformAttnPixelDecoder',
112
+ num_outs=3,
113
+ norm_cfg=dict(type='GN', num_groups=32),
114
+ act_cfg=dict(type='ReLU'),
115
+ encoder=dict( # DeformableDetrTransformerEncoder
116
+ # num_layers=6,
117
+ num_layers=2,
118
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
119
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
120
+ embed_dims=256,
121
+ num_heads=8,
122
+ num_levels=3,
123
+ num_points=4,
124
+ dropout=0.1,
125
+ batch_first=True),
126
+ ffn_cfg=dict(
127
+ embed_dims=256,
128
+ feedforward_channels=1024,
129
+ num_fcs=2,
130
+ ffn_drop=0.1,
131
+ act_cfg=dict(type='ReLU', inplace=True)))),
132
+ positional_encoding=dict(num_feats=128, normalize=True)),
133
+ enforce_decoder_input_project=False,
134
+ positional_encoding=dict(num_feats=128, normalize=True),
135
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
136
+ return_intermediate=True,
137
+ # num_layers=9,
138
+ num_layers=3,
139
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
140
+ self_attn_cfg=dict( # MultiheadAttention
141
+ embed_dims=256,
142
+ num_heads=8,
143
+ dropout=0.1,
144
+ batch_first=True),
145
+ cross_attn_cfg=dict( # MultiheadAttention
146
+ embed_dims=256,
147
+ num_heads=8,
148
+ dropout=0.1,
149
+ batch_first=True),
150
+ ffn_cfg=dict(
151
+ embed_dims=256,
152
+ feedforward_channels=2048,
153
+ num_fcs=2,
154
+ ffn_drop=0.1,
155
+ act_cfg=dict(type='ReLU', inplace=True))),
156
+ init_cfg=None),
157
+ loss_cls=dict(
158
+ type='mmdet.CrossEntropyLoss',
159
+ use_sigmoid=False,
160
+ loss_weight=2.0,
161
+ reduction='mean',
162
+ class_weight=[1.0] * num_classes + [0.1]),
163
+ loss_mask=dict(
164
+ type='mmdet.CrossEntropyLoss',
165
+ use_sigmoid=True,
166
+ reduction='mean',
167
+ loss_weight=5.0),
168
+ loss_dice=dict(
169
+ type='mmdet.DiceLoss',
170
+ use_sigmoid=True,
171
+ activate=True,
172
+ reduction='mean',
173
+ naive_dice=True,
174
+ eps=1.0,
175
+ loss_weight=5.0)),
176
+ panoptic_fusion_head=dict(
177
+ type='mmdet.MaskFormerFusionHead',
178
+ num_things_classes=num_things_classes,
179
+ num_stuff_classes=num_stuff_classes,
180
+ loss_panoptic=None,
181
+ init_cfg=None),
182
+ train_cfg=dict(
183
+ num_points=12544,
184
+ oversample_ratio=3.0,
185
+ importance_sample_ratio=0.75,
186
+ assigner=dict(
187
+ type='mmdet.HungarianAssigner',
188
+ match_costs=[
189
+ dict(type='mmdet.ClassificationCost', weight=2.0),
190
+ dict(
191
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
192
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
193
+ ]),
194
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
195
+ test_cfg=dict(
196
+ panoptic_on=False,
197
+ # For now, the dataset does not support
198
+ # evaluating semantic segmentation metric.
199
+ semantic_on=False,
200
+ instance_on=True,
201
+ # max_per_image is for instance segmentation.
202
+ max_per_image=num_queries,
203
+ iou_thr=0.8,
204
+ # In Mask2Former's panoptic postprocessing,
205
+ # it will filter mask area where score is less than 0.5 .
206
+ filter_low_score=True),
207
+ init_cfg=None)
208
+
209
+ task_name = 'ssdd_ins'
210
+ exp_name = 'E20230531_1'
211
+ logger = dict(
212
+ type='WandbLogger',
213
+ project=task_name,
214
+ group='samcls-mask2former',
215
+ name=exp_name
216
+ )
217
+ # logger = None
218
+
219
+ callbacks = [
220
+ param_scheduler_callback,
221
+ dict(
222
+ type='ModelCheckpoint',
223
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
224
+ save_last=True,
225
+ mode='max',
226
+ monitor='valsegm_map_0',
227
+ save_top_k=2,
228
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
229
+ ),
230
+ dict(
231
+ type='LearningRateMonitor',
232
+ logging_interval='step'
233
+ )
234
+ ]
235
+
236
+
237
+ trainer_cfg = dict(
238
+ compiled_model=False,
239
+ accelerator="auto",
240
+ strategy="auto",
241
+ # strategy="ddp",
242
+ # strategy='ddp_find_unused_parameters_true',
243
+ # precision='32',
244
+ # precision='16-mixed',
245
+ devices=8,
246
+ default_root_dir=f'results/{task_name}/{exp_name}',
247
+ # default_root_dir='results/tmp',
248
+ max_epochs=max_epochs,
249
+ logger=logger,
250
+ callbacks=callbacks,
251
+ log_every_n_steps=5,
252
+ check_val_every_n_epoch=5,
253
+ benchmark=True,
254
+ # sync_batchnorm=True,
255
+ # fast_dev_run=True,
256
+
257
+ # limit_train_batches=1,
258
+ # limit_val_batches=0,
259
+ # limit_test_batches=None,
260
+ # limit_predict_batches=None,
261
+ # overfit_batches=0.0,
262
+
263
+ # val_check_interval=None,
264
+ # num_sanity_val_steps=0,
265
+ # enable_checkpointing=None,
266
+ # enable_progress_bar=None,
267
+ # enable_model_summary=None,
268
+ # accumulate_grad_batches=32,
269
+ # gradient_clip_val=15,
270
+ # gradient_clip_algorithm='norm',
271
+ # deterministic=None,
272
+ # inference_mode: bool=True,
273
+ use_distributed_sampler=True,
274
+ # profiler="simple",
275
+ # detect_anomaly=False,
276
+ # barebones=False,
277
+ # plugins=None,
278
+ # reload_dataloaders_every_n_epochs=0,
279
+ )
280
+
281
+
282
+ backend_args = None
283
+ train_pipeline = [
284
+ dict(type='mmdet.LoadImageFromFile'),
285
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
286
+ dict(type='mmdet.Resize', scale=image_size),
287
+ dict(type='mmdet.RandomFlip', prob=0.5),
288
+ dict(type='mmdet.PackDetInputs')
289
+ ]
290
+
291
+ test_pipeline = [
292
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
293
+ dict(type='mmdet.Resize', scale=image_size),
294
+ # If you don't have a gt annotation, delete the pipeline
295
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
296
+ dict(
297
+ type='mmdet.PackDetInputs',
298
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
299
+ 'scale_factor'))
300
+ ]
301
+
302
+
303
+ train_batch_size_per_gpu = 6
304
+ train_num_workers = 4
305
+ test_batch_size_per_gpu = 6
306
+ test_num_workers = 4
307
+ persistent_workers = True
308
+
309
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
310
+ dataset_type = 'SSDDInsSegDataset'
311
+
312
+ val_loader = dict(
313
+ batch_size=test_batch_size_per_gpu,
314
+ num_workers=test_num_workers,
315
+ persistent_workers=persistent_workers,
316
+ pin_memory=True,
317
+ dataset=dict(
318
+ type=dataset_type,
319
+ data_root=data_parent,
320
+ ann_file='annotations/SSDD_instances_val.json',
321
+ data_prefix=dict(img_path='imgs'),
322
+ test_mode=True,
323
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
324
+ pipeline=test_pipeline,
325
+ backend_args=backend_args))
326
+
327
+ datamodule_cfg = dict(
328
+ type='PLDataModule',
329
+ train_loader=dict(
330
+ batch_size=train_batch_size_per_gpu,
331
+ num_workers=train_num_workers,
332
+ persistent_workers=persistent_workers,
333
+ pin_memory=True,
334
+ dataset=dict(
335
+ type=dataset_type,
336
+ data_root=data_parent,
337
+ ann_file='annotations/SSDD_instances_train.json',
338
+ data_prefix=dict(img_path='imgs'),
339
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
340
+ pipeline=train_pipeline,
341
+ backend_args=backend_args)
342
+ ),
343
+ val_loader=val_loader,
344
+ # test_loader=val_loader
345
+ predict_loader=val_loader
346
+ )
configs/rsprompter/samseg_mask2former_whu_config.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'sam_neck',
6
+ 'data_preprocessor'
7
+ ]
8
+
9
+ sub_model_optim = {
10
+ 'sam_neck': {'lr_mult': 1},
11
+ 'panoptic_head': {'lr_mult': 1},
12
+ }
13
+
14
+ max_epochs = 400
15
+
16
+ optimizer = dict(
17
+ type='AdamW',
18
+ sub_model=sub_model_optim,
19
+ lr=0.0005,
20
+ weight_decay=1e-3
21
+ )
22
+
23
+ param_scheduler = [
24
+ # warm up learning rate scheduler
25
+ dict(
26
+ type='LinearLR',
27
+ start_factor=5e-4,
28
+ by_epoch=True,
29
+ begin=0,
30
+ end=1,
31
+ # update by iter
32
+ convert_to_iter_based=True),
33
+ # main learning rate scheduler
34
+ dict(
35
+ type='CosineAnnealingLR',
36
+ T_max=max_epochs,
37
+ by_epoch=True,
38
+ begin=1,
39
+ end=max_epochs,
40
+ ),
41
+ ]
42
+
43
+ param_scheduler_callback = dict(
44
+ type='ParamSchedulerHook'
45
+ )
46
+
47
+ evaluator_ = dict(
48
+ type='CocoPLMetric',
49
+ metric=['bbox', 'segm'],
50
+ proposal_nums=[1, 10, 100]
51
+ )
52
+
53
+ evaluator = dict(
54
+ # train_evaluator=evaluator_,
55
+ val_evaluator=evaluator_,
56
+ )
57
+
58
+
59
+ image_size = (1024, 1024)
60
+
61
+ data_preprocessor = dict(
62
+ type='mmdet.DetDataPreprocessor',
63
+ mean=[123.675, 116.28, 103.53],
64
+ std=[58.395, 57.12, 57.375],
65
+ bgr_to_rgb=True,
66
+ pad_size_divisor=32,
67
+ pad_mask=True,
68
+ mask_pad_value=0,
69
+ )
70
+
71
+ num_things_classes = 1
72
+ num_stuff_classes = 0
73
+ num_classes = num_things_classes + num_stuff_classes
74
+
75
+ num_queries = 100
76
+ model_cfg = dict(
77
+ type='SegSAMPLer',
78
+ hyperparameters=dict(
79
+ optimizer=optimizer,
80
+ param_scheduler=param_scheduler,
81
+ evaluator=evaluator,
82
+ ),
83
+ need_train_names=sub_model_train,
84
+ data_preprocessor=data_preprocessor,
85
+ backbone=dict(
86
+ type='vit_h',
87
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
88
+ # type='vit_b',
89
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
90
+ ),
91
+ sam_neck=dict(
92
+ type='SAMAggregatorNeck',
93
+ in_channels=[1280] * 32,
94
+ # in_channels=[768] * 12,
95
+ inner_channels=32,
96
+ selected_channels=range(4, 32, 2),
97
+ # selected_channels=range(4, 12, 2),
98
+ out_channels=256,
99
+ up_sample_scale=4,
100
+ ),
101
+ panoptic_head=dict(
102
+ type='mmdet.Mask2FormerHead',
103
+ in_channels=[256, 256, 256], # pass to pixel_decoder inside
104
+ strides=[8, 16, 32],
105
+ feat_channels=256,
106
+ out_channels=256,
107
+ num_things_classes=num_things_classes,
108
+ num_stuff_classes=num_stuff_classes,
109
+ num_queries=num_queries,
110
+ num_transformer_feat_level=3,
111
+ pixel_decoder=dict(
112
+ type='mmdet.MSDeformAttnPixelDecoder',
113
+ num_outs=3,
114
+ norm_cfg=dict(type='GN', num_groups=32),
115
+ act_cfg=dict(type='ReLU'),
116
+ encoder=dict( # DeformableDetrTransformerEncoder
117
+ # num_layers=6,
118
+ num_layers=2,
119
+ layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
120
+ self_attn_cfg=dict( # MultiScaleDeformableAttention
121
+ embed_dims=256,
122
+ num_heads=8,
123
+ num_levels=3,
124
+ num_points=4,
125
+ dropout=0.1,
126
+ batch_first=True),
127
+ ffn_cfg=dict(
128
+ embed_dims=256,
129
+ feedforward_channels=1024,
130
+ num_fcs=2,
131
+ ffn_drop=0.1,
132
+ act_cfg=dict(type='ReLU', inplace=True)))),
133
+ positional_encoding=dict(num_feats=128, normalize=True)),
134
+ enforce_decoder_input_project=False,
135
+ positional_encoding=dict(num_feats=128, normalize=True),
136
+ transformer_decoder=dict( # Mask2FormerTransformerDecoder
137
+ return_intermediate=True,
138
+ # num_layers=9,
139
+ num_layers=3,
140
+ layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
141
+ self_attn_cfg=dict( # MultiheadAttention
142
+ embed_dims=256,
143
+ num_heads=8,
144
+ dropout=0.1,
145
+ batch_first=True),
146
+ cross_attn_cfg=dict( # MultiheadAttention
147
+ embed_dims=256,
148
+ num_heads=8,
149
+ dropout=0.1,
150
+ batch_first=True),
151
+ ffn_cfg=dict(
152
+ embed_dims=256,
153
+ feedforward_channels=2048,
154
+ num_fcs=2,
155
+ ffn_drop=0.1,
156
+ act_cfg=dict(type='ReLU', inplace=True))),
157
+ init_cfg=None),
158
+ loss_cls=dict(
159
+ type='mmdet.CrossEntropyLoss',
160
+ use_sigmoid=False,
161
+ loss_weight=2.0,
162
+ reduction='mean',
163
+ class_weight=[1.0] * num_classes + [0.1]),
164
+ loss_mask=dict(
165
+ type='mmdet.CrossEntropyLoss',
166
+ use_sigmoid=True,
167
+ reduction='mean',
168
+ loss_weight=5.0),
169
+ loss_dice=dict(
170
+ type='mmdet.DiceLoss',
171
+ use_sigmoid=True,
172
+ activate=True,
173
+ reduction='mean',
174
+ naive_dice=True,
175
+ eps=1.0,
176
+ loss_weight=5.0)),
177
+ panoptic_fusion_head=dict(
178
+ type='mmdet.MaskFormerFusionHead',
179
+ num_things_classes=num_things_classes,
180
+ num_stuff_classes=num_stuff_classes,
181
+ loss_panoptic=None,
182
+ init_cfg=None),
183
+ train_cfg=dict(
184
+ num_points=12544,
185
+ oversample_ratio=3.0,
186
+ importance_sample_ratio=0.75,
187
+ assigner=dict(
188
+ type='mmdet.HungarianAssigner',
189
+ match_costs=[
190
+ dict(type='mmdet.ClassificationCost', weight=2.0),
191
+ dict(
192
+ type='mmdet.CrossEntropyLossCost', weight=5.0, use_sigmoid=True),
193
+ dict(type='mmdet.DiceCost', weight=5.0, pred_act=True, eps=1.0)
194
+ ]),
195
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
196
+ test_cfg=dict(
197
+ panoptic_on=False,
198
+ # For now, the dataset does not support
199
+ # evaluating semantic segmentation metric.
200
+ semantic_on=False,
201
+ instance_on=True,
202
+ # max_per_image is for instance segmentation.
203
+ max_per_image=num_queries,
204
+ iou_thr=0.8,
205
+ # In Mask2Former's panoptic postprocessing,
206
+ # it will filter mask area where score is less than 0.5 .
207
+ filter_low_score=True),
208
+ init_cfg=None)
209
+
210
+ task_name = 'whu_ins'
211
+ exp_name = 'E20230531_2'
212
+ logger = dict(
213
+ type='WandbLogger',
214
+ project=task_name,
215
+ group='samcls-mask2former',
216
+ name=exp_name
217
+ )
218
+ # logger = None
219
+
220
+ callbacks = [
221
+ param_scheduler_callback,
222
+ dict(
223
+ type='ModelCheckpoint',
224
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
225
+ save_last=True,
226
+ mode='max',
227
+ monitor='valsegm_map_0',
228
+ save_top_k=2,
229
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
230
+ ),
231
+ dict(
232
+ type='LearningRateMonitor',
233
+ logging_interval='step'
234
+ )
235
+ ]
236
+
237
+
238
+ trainer_cfg = dict(
239
+ compiled_model=False,
240
+ accelerator="auto",
241
+ strategy="auto",
242
+ # strategy="ddp",
243
+ # strategy='ddp_find_unused_parameters_true',
244
+ # precision='32',
245
+ # precision='16-mixed',
246
+ devices=8,
247
+ default_root_dir=f'results/{task_name}/{exp_name}',
248
+ # default_root_dir='results/tmp',
249
+ max_epochs=max_epochs,
250
+ logger=logger,
251
+ callbacks=callbacks,
252
+ log_every_n_steps=20,
253
+ check_val_every_n_epoch=5,
254
+ benchmark=True,
255
+ # sync_batchnorm=True,
256
+ # fast_dev_run=True,
257
+
258
+ # limit_train_batches=1,
259
+ # limit_val_batches=0,
260
+ # limit_test_batches=None,
261
+ # limit_predict_batches=None,
262
+ # overfit_batches=0.0,
263
+
264
+ # val_check_interval=None,
265
+ # num_sanity_val_steps=0,
266
+ # enable_checkpointing=None,
267
+ # enable_progress_bar=None,
268
+ # enable_model_summary=None,
269
+ # accumulate_grad_batches=32,
270
+ # gradient_clip_val=15,
271
+ # gradient_clip_algorithm='norm',
272
+ # deterministic=None,
273
+ # inference_mode: bool=True,
274
+ use_distributed_sampler=True,
275
+ # profiler="simple",
276
+ # detect_anomaly=False,
277
+ # barebones=False,
278
+ # plugins=None,
279
+ # reload_dataloaders_every_n_epochs=0,
280
+ )
281
+
282
+
283
+ backend_args = None
284
+ train_pipeline = [
285
+ dict(type='mmdet.LoadImageFromFile'),
286
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
287
+ dict(type='mmdet.Resize', scale=image_size),
288
+ dict(type='mmdet.RandomFlip', prob=0.5),
289
+ dict(type='mmdet.PackDetInputs')
290
+ ]
291
+
292
+ test_pipeline = [
293
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
294
+ dict(type='mmdet.Resize', scale=image_size),
295
+ # If you don't have a gt annotation, delete the pipeline
296
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
297
+ dict(
298
+ type='mmdet.PackDetInputs',
299
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
300
+ 'scale_factor'))
301
+ ]
302
+
303
+
304
+ train_batch_size_per_gpu = 6
305
+ train_num_workers = 4
306
+ test_batch_size_per_gpu = 6
307
+ test_num_workers = 4
308
+ persistent_workers = True
309
+
310
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
311
+ train_data_prefix = 'train/'
312
+ val_data_prefix = 'test/'
313
+ dataset_type = 'WHUInsSegDataset'
314
+
315
+ val_loader = dict(
316
+ batch_size=test_batch_size_per_gpu,
317
+ num_workers=test_num_workers,
318
+ persistent_workers=persistent_workers,
319
+ pin_memory=True,
320
+ dataset=dict(
321
+ type=dataset_type,
322
+ data_root=data_parent,
323
+ ann_file='annotations/WHU_building_test.json',
324
+ data_prefix=dict(img_path=val_data_prefix + '/image', seg_path=val_data_prefix + '/label'),
325
+ test_mode=True,
326
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
327
+ pipeline=test_pipeline,
328
+ backend_args=backend_args))
329
+
330
+ datamodule_cfg = dict(
331
+ type='PLDataModule',
332
+ train_loader=dict(
333
+ batch_size=train_batch_size_per_gpu,
334
+ num_workers=train_num_workers,
335
+ persistent_workers=persistent_workers,
336
+ pin_memory=True,
337
+ dataset=dict(
338
+ type=dataset_type,
339
+ data_root=data_parent,
340
+ ann_file='annotations/WHU_building_train.json',
341
+ data_prefix=dict(img_path=train_data_prefix + '/image', seg_path=train_data_prefix + '/label'),
342
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
343
+ pipeline=train_pipeline,
344
+ backend_args=backend_args)
345
+ ),
346
+ val_loader=val_loader,
347
+ # test_loader=val_loader
348
+ predict_loader=val_loader
349
+ )
configs/rsprompter/samseg_maskrcnn_nwpu_config.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 1000
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=5e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ # train_evaluator=evaluator_,
53
+ val_evaluator=evaluator_,
54
+ )
55
+
56
+
57
+ image_size = (1024, 1024)
58
+
59
+ data_preprocessor = dict(
60
+ type='mmdet.DetDataPreprocessor',
61
+ mean=[123.675, 116.28, 103.53],
62
+ std=[58.395, 57.12, 57.375],
63
+ bgr_to_rgb=True,
64
+ pad_size_divisor=32,
65
+ pad_mask=True,
66
+ mask_pad_value=0,
67
+ )
68
+
69
+ num_things_classes = 10
70
+ num_stuff_classes = 0
71
+ num_classes = num_things_classes + num_stuff_classes
72
+
73
+
74
+ model_cfg = dict(
75
+ type='SegSAMAnchorPLer',
76
+ hyperparameters=dict(
77
+ optimizer=optimizer,
78
+ param_scheduler=param_scheduler,
79
+ evaluator=evaluator,
80
+ ),
81
+ need_train_names=sub_model_train,
82
+ data_preprocessor=data_preprocessor,
83
+ backbone=dict(
84
+ type='vit_h',
85
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
86
+ # type='vit_b',
87
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
88
+ ),
89
+ panoptic_head=dict(
90
+ type='SAMAnchorInstanceHead',
91
+ sam_head=False,
92
+ neck=dict(
93
+ type='SAMAggregatorNeck',
94
+ in_channels=[1280] * 32,
95
+ # in_channels=[768] * 12,
96
+ inner_channels=32,
97
+ selected_channels=range(4, 32, 2),
98
+ # selected_channels=range(4, 12, 2),
99
+ out_channels=256,
100
+ up_sample_scale=4,
101
+ ),
102
+ rpn_head=dict(
103
+ type='mmdet.RPNHead',
104
+ in_channels=256,
105
+ feat_channels=256,
106
+ anchor_generator=dict(
107
+ type='mmdet.AnchorGenerator',
108
+ scales=[2, 4, 8, 16, 32, 64],
109
+ ratios=[0.5, 1.0, 2.0],
110
+ strides=[8, 16, 32]),
111
+ bbox_coder=dict(
112
+ type='mmdet.DeltaXYWHBBoxCoder',
113
+ target_means=[.0, .0, .0, .0],
114
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
115
+ loss_cls=dict(
116
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
117
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
118
+ roi_head=dict(
119
+ type='mmdet.StandardRoIHead',
120
+ bbox_roi_extractor=dict(
121
+ type='mmdet.SingleRoIExtractor',
122
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
123
+ out_channels=256,
124
+ featmap_strides=[8, 16, 32]),
125
+ bbox_head=dict(
126
+ type='mmdet.Shared2FCBBoxHead',
127
+ in_channels=256,
128
+ fc_out_channels=1024,
129
+ roi_feat_size=7,
130
+ num_classes=num_classes,
131
+ bbox_coder=dict(
132
+ type='mmdet.DeltaXYWHBBoxCoder',
133
+ target_means=[0., 0., 0., 0.],
134
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
135
+ reg_class_agnostic=False,
136
+ loss_cls=dict(
137
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
138
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
139
+ mask_roi_extractor=dict(
140
+ type='mmdet.SingleRoIExtractor',
141
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
142
+ out_channels=256,
143
+ featmap_strides=[8, 16, 32]),
144
+ mask_head=dict(
145
+ type='mmdet.FCNMaskHead',
146
+ num_convs=4,
147
+ in_channels=256,
148
+ conv_out_channels=256,
149
+ num_classes=num_classes,
150
+ loss_mask=dict(
151
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
152
+ # model training and testing settings
153
+ train_cfg=dict(
154
+ rpn=dict(
155
+ assigner=dict(
156
+ type='mmdet.MaxIoUAssigner',
157
+ pos_iou_thr=0.7,
158
+ neg_iou_thr=0.3,
159
+ min_pos_iou=0.3,
160
+ match_low_quality=True,
161
+ ignore_iof_thr=-1),
162
+ sampler=dict(
163
+ type='mmdet.RandomSampler',
164
+ num=256,
165
+ pos_fraction=0.5,
166
+ neg_pos_ub=-1,
167
+ add_gt_as_proposals=False),
168
+ allowed_border=-1,
169
+ pos_weight=-1,
170
+ debug=False),
171
+ rpn_proposal=dict(
172
+ nms_pre=2000,
173
+ max_per_img=1000,
174
+ nms=dict(type='nms', iou_threshold=0.7),
175
+ min_bbox_size=0),
176
+ rcnn=dict(
177
+ assigner=dict(
178
+ type='mmdet.MaxIoUAssigner',
179
+ pos_iou_thr=0.5,
180
+ neg_iou_thr=0.5,
181
+ min_pos_iou=0.5,
182
+ match_low_quality=True,
183
+ ignore_iof_thr=-1),
184
+ sampler=dict(
185
+ type='mmdet.RandomSampler',
186
+ num=512,
187
+ pos_fraction=0.25,
188
+ neg_pos_ub=-1,
189
+ add_gt_as_proposals=True),
190
+ mask_size=28,
191
+ pos_weight=-1,
192
+ debug=False)),
193
+ test_cfg=dict(
194
+ rpn=dict(
195
+ nms_pre=1000,
196
+ max_per_img=1000,
197
+ nms=dict(type='nms', iou_threshold=0.7),
198
+ min_bbox_size=0),
199
+ rcnn=dict(
200
+ score_thr=0.05,
201
+ nms=dict(type='nms', iou_threshold=0.5),
202
+ max_per_img=100,
203
+ mask_thr_binary=0.5)
204
+ )
205
+ )
206
+ )
207
+
208
+ task_name = 'nwpu_ins'
209
+ exp_name = 'E20230530_0'
210
+ logger = dict(
211
+ type='WandbLogger',
212
+ project=task_name,
213
+ group='samcls-rcnn',
214
+ name=exp_name
215
+ )
216
+ # logger = None
217
+
218
+ callbacks = [
219
+ param_scheduler_callback,
220
+ dict(
221
+ type='ModelCheckpoint',
222
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
223
+ save_last=True,
224
+ mode='max',
225
+ monitor='valsegm_map_0',
226
+ save_top_k=2,
227
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
228
+ ),
229
+ dict(
230
+ type='LearningRateMonitor',
231
+ logging_interval='step'
232
+ )
233
+ ]
234
+
235
+
236
+ trainer_cfg = dict(
237
+ compiled_model=False,
238
+ accelerator="auto",
239
+ strategy="auto",
240
+ # strategy="ddp",
241
+ # strategy='ddp_find_unused_parameters_true',
242
+ # precision='32',
243
+ # precision='16-mixed',
244
+ devices=8,
245
+ default_root_dir=f'results/{task_name}/{exp_name}',
246
+ # default_root_dir='results/tmp',
247
+ max_epochs=max_epochs,
248
+ logger=logger,
249
+ callbacks=callbacks,
250
+ log_every_n_steps=5,
251
+ check_val_every_n_epoch=5,
252
+ benchmark=True,
253
+ # sync_batchnorm=True,
254
+ # fast_dev_run=True,
255
+
256
+ # limit_train_batches=1,
257
+ # limit_val_batches=0,
258
+ # limit_test_batches=None,
259
+ # limit_predict_batches=None,
260
+ # overfit_batches=0.0,
261
+
262
+ # val_check_interval=None,
263
+ # num_sanity_val_steps=0,
264
+ # enable_checkpointing=None,
265
+ # enable_progress_bar=None,
266
+ # enable_model_summary=None,
267
+ # accumulate_grad_batches=32,
268
+ # gradient_clip_val=15,
269
+ # gradient_clip_algorithm='norm',
270
+ # deterministic=None,
271
+ # inference_mode: bool=True,
272
+ use_distributed_sampler=True,
273
+ # profiler="simple",
274
+ # detect_anomaly=False,
275
+ # barebones=False,
276
+ # plugins=None,
277
+ # reload_dataloaders_every_n_epochs=0,
278
+ )
279
+
280
+
281
+ backend_args = None
282
+ train_pipeline = [
283
+ dict(type='mmdet.LoadImageFromFile'),
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(type='mmdet.Resize', scale=image_size),
286
+ dict(type='mmdet.RandomFlip', prob=0.5),
287
+ dict(type='mmdet.PackDetInputs')
288
+ ]
289
+
290
+ test_pipeline = [
291
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
292
+ dict(type='mmdet.Resize', scale=image_size),
293
+ # If you don't have a gt annotation, delete the pipeline
294
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
295
+ dict(
296
+ type='mmdet.PackDetInputs',
297
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
298
+ 'scale_factor'))
299
+ ]
300
+
301
+
302
+ train_batch_size_per_gpu = 6
303
+ train_num_workers = 4
304
+ test_batch_size_per_gpu = 6
305
+ test_num_workers = 4
306
+ persistent_workers = True
307
+
308
+ data_parent = '/mnt/search01/dataset/cky_data/NWPU10'
309
+ train_data_prefix = ''
310
+ val_data_prefix = ''
311
+
312
+ dataset_type = 'NWPUInsSegDataset'
313
+
314
+ val_loader = dict(
315
+ batch_size=test_batch_size_per_gpu,
316
+ num_workers=test_num_workers,
317
+ persistent_workers=persistent_workers,
318
+ pin_memory=True,
319
+ dataset=dict(
320
+ type=dataset_type,
321
+ data_root=data_parent,
322
+ ann_file='NWPU_instances_val.json',
323
+ data_prefix=dict(img_path='positive image set'),
324
+ test_mode=True,
325
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
326
+ pipeline=test_pipeline,
327
+ backend_args=backend_args))
328
+
329
+ datamodule_cfg = dict(
330
+ type='PLDataModule',
331
+ train_loader=dict(
332
+ batch_size=train_batch_size_per_gpu,
333
+ num_workers=train_num_workers,
334
+ persistent_workers=persistent_workers,
335
+ pin_memory=True,
336
+ dataset=dict(
337
+ type=dataset_type,
338
+ data_root=data_parent,
339
+ ann_file='NWPU_instances_train.json',
340
+ data_prefix=dict(img_path='positive image set'),
341
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
342
+ pipeline=train_pipeline,
343
+ backend_args=backend_args)
344
+ ),
345
+ val_loader=val_loader,
346
+ # test_loader=val_loader
347
+ predict_loader=val_loader
348
+ )
configs/rsprompter/samseg_maskrcnn_ssdd_config.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 800
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=5e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ # train_evaluator=evaluator_,
53
+ val_evaluator=evaluator_,
54
+ )
55
+
56
+
57
+ image_size = (1024, 1024)
58
+
59
+ data_preprocessor = dict(
60
+ type='mmdet.DetDataPreprocessor',
61
+ mean=[123.675, 116.28, 103.53],
62
+ std=[58.395, 57.12, 57.375],
63
+ bgr_to_rgb=True,
64
+ pad_size_divisor=32,
65
+ pad_mask=True,
66
+ mask_pad_value=0,
67
+ )
68
+
69
+ num_things_classes = 1
70
+ num_stuff_classes = 0
71
+ num_classes = num_things_classes + num_stuff_classes
72
+
73
+
74
+ model_cfg = dict(
75
+ type='SegSAMAnchorPLer',
76
+ hyperparameters=dict(
77
+ optimizer=optimizer,
78
+ param_scheduler=param_scheduler,
79
+ evaluator=evaluator,
80
+ ),
81
+ need_train_names=sub_model_train,
82
+ data_preprocessor=data_preprocessor,
83
+ backbone=dict(
84
+ type='vit_h',
85
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
86
+ # type='vit_b',
87
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
88
+ ),
89
+ panoptic_head=dict(
90
+ type='SAMAnchorInstanceHead',
91
+ sam_head=False,
92
+ neck=dict(
93
+ type='SAMAggregatorNeck',
94
+ in_channels=[1280] * 32,
95
+ # in_channels=[768] * 12,
96
+ inner_channels=32,
97
+ selected_channels=range(4, 32, 2),
98
+ # selected_channels=range(4, 12, 2),
99
+ out_channels=256,
100
+ up_sample_scale=4,
101
+ ),
102
+ rpn_head=dict(
103
+ type='mmdet.RPNHead',
104
+ in_channels=256,
105
+ feat_channels=256,
106
+ anchor_generator=dict(
107
+ type='mmdet.AnchorGenerator',
108
+ scales=[2, 4, 8, 16, 32, 64],
109
+ ratios=[0.5, 1.0, 2.0],
110
+ strides=[8, 16, 32]),
111
+ bbox_coder=dict(
112
+ type='mmdet.DeltaXYWHBBoxCoder',
113
+ target_means=[.0, .0, .0, .0],
114
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
115
+ loss_cls=dict(
116
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
117
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
118
+ roi_head=dict(
119
+ type='mmdet.StandardRoIHead',
120
+ bbox_roi_extractor=dict(
121
+ type='mmdet.SingleRoIExtractor',
122
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
123
+ out_channels=256,
124
+ featmap_strides=[8, 16, 32]),
125
+ bbox_head=dict(
126
+ type='mmdet.Shared2FCBBoxHead',
127
+ in_channels=256,
128
+ fc_out_channels=1024,
129
+ roi_feat_size=7,
130
+ num_classes=num_classes,
131
+ bbox_coder=dict(
132
+ type='mmdet.DeltaXYWHBBoxCoder',
133
+ target_means=[0., 0., 0., 0.],
134
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
135
+ reg_class_agnostic=False,
136
+ loss_cls=dict(
137
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
138
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
139
+ mask_roi_extractor=dict(
140
+ type='mmdet.SingleRoIExtractor',
141
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
142
+ out_channels=256,
143
+ featmap_strides=[8, 16, 32]),
144
+ mask_head=dict(
145
+ type='mmdet.FCNMaskHead',
146
+ num_convs=4,
147
+ in_channels=256,
148
+ conv_out_channels=256,
149
+ num_classes=num_classes,
150
+ loss_mask=dict(
151
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
152
+ # model training and testing settings
153
+ train_cfg=dict(
154
+ rpn=dict(
155
+ assigner=dict(
156
+ type='mmdet.MaxIoUAssigner',
157
+ pos_iou_thr=0.7,
158
+ neg_iou_thr=0.3,
159
+ min_pos_iou=0.3,
160
+ match_low_quality=True,
161
+ ignore_iof_thr=-1),
162
+ sampler=dict(
163
+ type='mmdet.RandomSampler',
164
+ num=256,
165
+ pos_fraction=0.5,
166
+ neg_pos_ub=-1,
167
+ add_gt_as_proposals=False),
168
+ allowed_border=-1,
169
+ pos_weight=-1,
170
+ debug=False),
171
+ rpn_proposal=dict(
172
+ nms_pre=2000,
173
+ max_per_img=1000,
174
+ nms=dict(type='nms', iou_threshold=0.7),
175
+ min_bbox_size=0),
176
+ rcnn=dict(
177
+ assigner=dict(
178
+ type='mmdet.MaxIoUAssigner',
179
+ pos_iou_thr=0.5,
180
+ neg_iou_thr=0.5,
181
+ min_pos_iou=0.5,
182
+ match_low_quality=True,
183
+ ignore_iof_thr=-1),
184
+ sampler=dict(
185
+ type='mmdet.RandomSampler',
186
+ num=512,
187
+ pos_fraction=0.25,
188
+ neg_pos_ub=-1,
189
+ add_gt_as_proposals=True),
190
+ mask_size=28,
191
+ pos_weight=-1,
192
+ debug=False)),
193
+ test_cfg=dict(
194
+ rpn=dict(
195
+ nms_pre=1000,
196
+ max_per_img=1000,
197
+ nms=dict(type='nms', iou_threshold=0.7),
198
+ min_bbox_size=0),
199
+ rcnn=dict(
200
+ score_thr=0.05,
201
+ nms=dict(type='nms', iou_threshold=0.5),
202
+ max_per_img=100,
203
+ mask_thr_binary=0.5)
204
+ )
205
+ )
206
+ )
207
+
208
+ task_name = 'ssdd_ins'
209
+ exp_name = 'E20230530_1'
210
+ logger = dict(
211
+ type='WandbLogger',
212
+ project=task_name,
213
+ group='samcls-rcnn',
214
+ name=exp_name
215
+ )
216
+ # logger = None
217
+
218
+ callbacks = [
219
+ param_scheduler_callback,
220
+ dict(
221
+ type='ModelCheckpoint',
222
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
223
+ save_last=True,
224
+ mode='max',
225
+ monitor='valsegm_map_0',
226
+ save_top_k=2,
227
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
228
+ ),
229
+ dict(
230
+ type='LearningRateMonitor',
231
+ logging_interval='step'
232
+ )
233
+ ]
234
+
235
+
236
+ trainer_cfg = dict(
237
+ compiled_model=False,
238
+ accelerator="auto",
239
+ strategy="auto",
240
+ # strategy="ddp",
241
+ # strategy='ddp_find_unused_parameters_true',
242
+ # precision='32',
243
+ # precision='16-mixed',
244
+ devices=8,
245
+ default_root_dir=f'results/{task_name}/{exp_name}',
246
+ # default_root_dir='results/tmp',
247
+ max_epochs=max_epochs,
248
+ logger=logger,
249
+ callbacks=callbacks,
250
+ log_every_n_steps=5,
251
+ check_val_every_n_epoch=5,
252
+ benchmark=True,
253
+ # sync_batchnorm=True,
254
+ # fast_dev_run=True,
255
+
256
+ # limit_train_batches=1,
257
+ # limit_val_batches=0,
258
+ # limit_test_batches=None,
259
+ # limit_predict_batches=None,
260
+ # overfit_batches=0.0,
261
+
262
+ # val_check_interval=None,
263
+ # num_sanity_val_steps=0,
264
+ # enable_checkpointing=None,
265
+ # enable_progress_bar=None,
266
+ # enable_model_summary=None,
267
+ # accumulate_grad_batches=32,
268
+ # gradient_clip_val=15,
269
+ # gradient_clip_algorithm='norm',
270
+ # deterministic=None,
271
+ # inference_mode: bool=True,
272
+ use_distributed_sampler=True,
273
+ # profiler="simple",
274
+ # detect_anomaly=False,
275
+ # barebones=False,
276
+ # plugins=None,
277
+ # reload_dataloaders_every_n_epochs=0,
278
+ )
279
+
280
+
281
+ backend_args = None
282
+ train_pipeline = [
283
+ dict(type='mmdet.LoadImageFromFile'),
284
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
285
+ dict(type='mmdet.Resize', scale=image_size),
286
+ dict(type='mmdet.RandomFlip', prob=0.5),
287
+ dict(type='mmdet.PackDetInputs')
288
+ ]
289
+
290
+ test_pipeline = [
291
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
292
+ dict(type='mmdet.Resize', scale=image_size),
293
+ # If you don't have a gt annotation, delete the pipeline
294
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
295
+ dict(
296
+ type='mmdet.PackDetInputs',
297
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
298
+ 'scale_factor'))
299
+ ]
300
+
301
+
302
+ train_batch_size_per_gpu = 6
303
+ train_num_workers = 4
304
+ test_batch_size_per_gpu = 6
305
+ test_num_workers = 4
306
+ persistent_workers = True
307
+
308
+ data_parent = '/mnt/search01/dataset/cky_data/SSDD'
309
+ dataset_type = 'SSDDInsSegDataset'
310
+
311
+ val_loader = dict(
312
+ batch_size=test_batch_size_per_gpu,
313
+ num_workers=test_num_workers,
314
+ persistent_workers=persistent_workers,
315
+ pin_memory=True,
316
+ dataset=dict(
317
+ type=dataset_type,
318
+ data_root=data_parent,
319
+ ann_file='annotations/SSDD_instances_val.json',
320
+ data_prefix=dict(img_path='imgs'),
321
+ test_mode=True,
322
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
323
+ pipeline=test_pipeline,
324
+ backend_args=backend_args))
325
+
326
+ datamodule_cfg = dict(
327
+ type='PLDataModule',
328
+ train_loader=dict(
329
+ batch_size=train_batch_size_per_gpu,
330
+ num_workers=train_num_workers,
331
+ persistent_workers=persistent_workers,
332
+ pin_memory=True,
333
+ dataset=dict(
334
+ type=dataset_type,
335
+ data_root=data_parent,
336
+ ann_file='annotations/SSDD_instances_train.json',
337
+ data_prefix=dict(img_path='imgs'),
338
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
339
+ pipeline=train_pipeline,
340
+ backend_args=backend_args)
341
+ ),
342
+ val_loader=val_loader,
343
+ # test_loader=val_loader
344
+ predict_loader=val_loader
345
+ )
configs/rsprompter/samseg_maskrcnn_whu_config.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False)
2
+
3
+ sub_model_train = [
4
+ 'panoptic_head',
5
+ 'data_preprocessor'
6
+ ]
7
+
8
+ sub_model_optim = {
9
+ 'panoptic_head': {'lr_mult': 1},
10
+ }
11
+
12
+ max_epochs = 400
13
+
14
+ optimizer = dict(
15
+ type='AdamW',
16
+ sub_model=sub_model_optim,
17
+ lr=0.0005,
18
+ weight_decay=1e-3
19
+ )
20
+
21
+ param_scheduler = [
22
+ # warm up learning rate scheduler
23
+ dict(
24
+ type='LinearLR',
25
+ start_factor=5e-4,
26
+ by_epoch=True,
27
+ begin=0,
28
+ end=1,
29
+ # update by iter
30
+ convert_to_iter_based=True),
31
+ # main learning rate scheduler
32
+ dict(
33
+ type='CosineAnnealingLR',
34
+ T_max=max_epochs,
35
+ by_epoch=True,
36
+ begin=1,
37
+ end=max_epochs,
38
+ ),
39
+ ]
40
+
41
+ param_scheduler_callback = dict(
42
+ type='ParamSchedulerHook'
43
+ )
44
+
45
+ evaluator_ = dict(
46
+ type='CocoPLMetric',
47
+ metric=['bbox', 'segm'],
48
+ proposal_nums=[1, 10, 100]
49
+ )
50
+
51
+ evaluator = dict(
52
+ val_evaluator=evaluator_,
53
+ )
54
+
55
+
56
+ image_size = (1024, 1024)
57
+
58
+ data_preprocessor = dict(
59
+ type='mmdet.DetDataPreprocessor',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ bgr_to_rgb=True,
63
+ pad_size_divisor=32,
64
+ pad_mask=True,
65
+ mask_pad_value=0,
66
+ )
67
+
68
+ num_things_classes = 1
69
+ num_stuff_classes = 0
70
+ num_classes = num_things_classes + num_stuff_classes
71
+
72
+
73
+ model_cfg = dict(
74
+ type='SegSAMAnchorPLer',
75
+ hyperparameters=dict(
76
+ optimizer=optimizer,
77
+ param_scheduler=param_scheduler,
78
+ evaluator=evaluator,
79
+ ),
80
+ need_train_names=sub_model_train,
81
+ data_preprocessor=data_preprocessor,
82
+ backbone=dict(
83
+ type='vit_h',
84
+ checkpoint='pretrain/sam/sam_vit_h_4b8939.pth',
85
+ # type='vit_b',
86
+ # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth',
87
+ ),
88
+ panoptic_head=dict(
89
+ type='SAMAnchorInstanceHead',
90
+ sam_head=False,
91
+ neck=dict(
92
+ type='SAMAggregatorNeck',
93
+ in_channels=[1280] * 32,
94
+ # in_channels=[768] * 12,
95
+ inner_channels=32,
96
+ selected_channels=range(4, 32, 2),
97
+ # selected_channels=range(4, 12, 2),
98
+ out_channels=256,
99
+ up_sample_scale=4,
100
+ ),
101
+ rpn_head=dict(
102
+ type='mmdet.RPNHead',
103
+ in_channels=256,
104
+ feat_channels=256,
105
+ anchor_generator=dict(
106
+ type='mmdet.AnchorGenerator',
107
+ scales=[2, 4, 8, 16, 32, 64],
108
+ ratios=[0.5, 1.0, 2.0],
109
+ strides=[8, 16, 32]),
110
+ bbox_coder=dict(
111
+ type='mmdet.DeltaXYWHBBoxCoder',
112
+ target_means=[.0, .0, .0, .0],
113
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
114
+ loss_cls=dict(
115
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
116
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
117
+ roi_head=dict(
118
+ type='mmdet.StandardRoIHead',
119
+ bbox_roi_extractor=dict(
120
+ type='mmdet.SingleRoIExtractor',
121
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
122
+ out_channels=256,
123
+ featmap_strides=[8, 16, 32]),
124
+ bbox_head=dict(
125
+ type='mmdet.Shared2FCBBoxHead',
126
+ in_channels=256,
127
+ fc_out_channels=1024,
128
+ roi_feat_size=7,
129
+ num_classes=num_classes,
130
+ bbox_coder=dict(
131
+ type='mmdet.DeltaXYWHBBoxCoder',
132
+ target_means=[0., 0., 0., 0.],
133
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
134
+ reg_class_agnostic=False,
135
+ loss_cls=dict(
136
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
137
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
138
+ mask_roi_extractor=dict(
139
+ type='mmdet.SingleRoIExtractor',
140
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
141
+ out_channels=256,
142
+ featmap_strides=[8, 16, 32]),
143
+ mask_head=dict(
144
+ type='mmdet.FCNMaskHead',
145
+ num_convs=4,
146
+ in_channels=256,
147
+ conv_out_channels=256,
148
+ num_classes=num_classes,
149
+ loss_mask=dict(
150
+ type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
151
+ # model training and testing settings
152
+ train_cfg=dict(
153
+ rpn=dict(
154
+ assigner=dict(
155
+ type='mmdet.MaxIoUAssigner',
156
+ pos_iou_thr=0.7,
157
+ neg_iou_thr=0.3,
158
+ min_pos_iou=0.3,
159
+ match_low_quality=True,
160
+ ignore_iof_thr=-1),
161
+ sampler=dict(
162
+ type='mmdet.RandomSampler',
163
+ num=256,
164
+ pos_fraction=0.5,
165
+ neg_pos_ub=-1,
166
+ add_gt_as_proposals=False),
167
+ allowed_border=-1,
168
+ pos_weight=-1,
169
+ debug=False),
170
+ rpn_proposal=dict(
171
+ nms_pre=2000,
172
+ max_per_img=1000,
173
+ nms=dict(type='nms', iou_threshold=0.7),
174
+ min_bbox_size=0),
175
+ rcnn=dict(
176
+ assigner=dict(
177
+ type='mmdet.MaxIoUAssigner',
178
+ pos_iou_thr=0.5,
179
+ neg_iou_thr=0.5,
180
+ min_pos_iou=0.5,
181
+ match_low_quality=True,
182
+ ignore_iof_thr=-1),
183
+ sampler=dict(
184
+ type='mmdet.RandomSampler',
185
+ num=512,
186
+ pos_fraction=0.25,
187
+ neg_pos_ub=-1,
188
+ add_gt_as_proposals=True),
189
+ mask_size=28,
190
+ pos_weight=-1,
191
+ debug=False)),
192
+ test_cfg=dict(
193
+ rpn=dict(
194
+ nms_pre=1000,
195
+ max_per_img=1000,
196
+ nms=dict(type='nms', iou_threshold=0.7),
197
+ min_bbox_size=0),
198
+ rcnn=dict(
199
+ score_thr=0.05,
200
+ nms=dict(type='nms', iou_threshold=0.5),
201
+ max_per_img=100,
202
+ mask_thr_binary=0.5)
203
+ )
204
+ )
205
+ )
206
+
207
+ task_name = 'whu_ins'
208
+ exp_name = 'E20230530_2'
209
+ logger = dict(
210
+ type='WandbLogger',
211
+ project=task_name,
212
+ group='samcls-rcnn',
213
+ name=exp_name
214
+ )
215
+ # logger = None
216
+
217
+ callbacks = [
218
+ param_scheduler_callback,
219
+ dict(
220
+ type='ModelCheckpoint',
221
+ dirpath=f'results/{task_name}/{exp_name}/checkpoints',
222
+ save_last=True,
223
+ mode='max',
224
+ monitor='valsegm_map_0',
225
+ save_top_k=2,
226
+ filename='epoch_{epoch}-map_{valsegm_map_0:.4f}'
227
+ ),
228
+ dict(
229
+ type='LearningRateMonitor',
230
+ logging_interval='step'
231
+ )
232
+ ]
233
+
234
+
235
+ trainer_cfg = dict(
236
+ compiled_model=False,
237
+ accelerator="auto",
238
+ strategy="auto",
239
+ # strategy="ddp",
240
+ # strategy='ddp_find_unused_parameters_true',
241
+ # precision='32',
242
+ # precision='16-mixed',
243
+ devices=8,
244
+ default_root_dir=f'results/{task_name}/{exp_name}',
245
+ # default_root_dir='results/tmp',
246
+ max_epochs=max_epochs,
247
+ logger=logger,
248
+ callbacks=callbacks,
249
+ log_every_n_steps=20,
250
+ check_val_every_n_epoch=5,
251
+ benchmark=True,
252
+ # sync_batchnorm=True,
253
+ # fast_dev_run=True,
254
+
255
+ # limit_train_batches=1,
256
+ # limit_val_batches=0,
257
+ # limit_test_batches=None,
258
+ # limit_predict_batches=None,
259
+ # overfit_batches=0.0,
260
+
261
+ # val_check_interval=None,
262
+ # num_sanity_val_steps=0,
263
+ # enable_checkpointing=None,
264
+ # enable_progress_bar=None,
265
+ # enable_model_summary=None,
266
+ # accumulate_grad_batches=32,
267
+ # gradient_clip_val=15,
268
+ # gradient_clip_algorithm='norm',
269
+ # deterministic=None,
270
+ # inference_mode: bool=True,
271
+ use_distributed_sampler=True,
272
+ # profiler="simple",
273
+ # detect_anomaly=False,
274
+ # barebones=False,
275
+ # plugins=None,
276
+ # reload_dataloaders_every_n_epochs=0,
277
+ )
278
+
279
+
280
+ backend_args = None
281
+ train_pipeline = [
282
+ dict(type='mmdet.LoadImageFromFile'),
283
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
284
+ dict(type='mmdet.Resize', scale=image_size),
285
+ dict(type='mmdet.RandomFlip', prob=0.5),
286
+ dict(type='mmdet.PackDetInputs')
287
+ ]
288
+
289
+ test_pipeline = [
290
+ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args),
291
+ dict(type='mmdet.Resize', scale=image_size),
292
+ # If you don't have a gt annotation, delete the pipeline
293
+ dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True),
294
+ dict(
295
+ type='mmdet.PackDetInputs',
296
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
297
+ 'scale_factor'))
298
+ ]
299
+
300
+
301
+ train_batch_size_per_gpu = 6
302
+ train_num_workers = 4
303
+ test_batch_size_per_gpu = 6
304
+ test_num_workers = 4
305
+ persistent_workers = True
306
+
307
+ data_parent = '/mnt/search01/dataset/cky_data/WHU'
308
+ train_data_prefix = 'train/'
309
+ val_data_prefix = 'test/'
310
+ dataset_type = 'WHUInsSegDataset'
311
+
312
+ val_loader = dict(
313
+ batch_size=test_batch_size_per_gpu,
314
+ num_workers=test_num_workers,
315
+ persistent_workers=persistent_workers,
316
+ pin_memory=True,
317
+ dataset=dict(
318
+ type=dataset_type,
319
+ data_root=data_parent,
320
+ ann_file='annotations/WHU_building_test.json',
321
+ data_prefix=dict(img_path=val_data_prefix + '/image', seg_path=val_data_prefix + '/label'),
322
+ test_mode=True,
323
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
324
+ pipeline=test_pipeline,
325
+ backend_args=backend_args))
326
+
327
+ datamodule_cfg = dict(
328
+ type='PLDataModule',
329
+ train_loader=dict(
330
+ batch_size=train_batch_size_per_gpu,
331
+ num_workers=train_num_workers,
332
+ persistent_workers=persistent_workers,
333
+ pin_memory=True,
334
+ dataset=dict(
335
+ type=dataset_type,
336
+ data_root=data_parent,
337
+ ann_file='annotations/WHU_building_train.json',
338
+ data_prefix=dict(img_path=train_data_prefix + '/image', seg_path=train_data_prefix + '/label'),
339
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
340
+ pipeline=train_pipeline,
341
+ backend_args=backend_args)
342
+ ),
343
+ val_loader=val_loader,
344
+ # test_loader=val_loader
345
+ predict_loader=val_loader
346
+ )