renzhongwei commited on
Commit
efe17b4
1 Parent(s): f8c1812
cascade-rcnn_x101-32x4d_fpn_1x_ct/20240412_192400/20240412_192400.log ADDED
The diff for this file is too large to render. See raw diff
 
cascade-rcnn_x101-32x4d_fpn_1x_ct/20240412_192400/vis_data/20240412_192400.json ADDED
The diff for this file is too large to render. See raw diff
 
cascade-rcnn_x101-32x4d_fpn_1x_ct/20240412_192400/vis_data/config.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backend_args = None
3
+ data_root = '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/'
4
+ dataset_type = 'CocoCTDataset'
5
+ default_hooks = dict(
6
+ checkpoint=dict(interval=1, type='CheckpointHook'),
7
+ logger=dict(interval=50, type='LoggerHook'),
8
+ param_scheduler=dict(type='ParamSchedulerHook'),
9
+ sampler_seed=dict(type='DistSamplerSeedHook'),
10
+ timer=dict(type='IterTimerHook'),
11
+ visualization=dict(type='DetVisualizationHook'))
12
+ default_scope = 'mmdet'
13
+ env_cfg = dict(
14
+ cudnn_benchmark=False,
15
+ dist_cfg=dict(backend='nccl'),
16
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
17
+ launcher = 'pytorch'
18
+ load_from = 'ckpt/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth'
19
+ log_level = 'INFO'
20
+ log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
21
+ model = dict(
22
+ backbone=dict(
23
+ base_width=4,
24
+ depth=101,
25
+ frozen_stages=1,
26
+ groups=32,
27
+ init_cfg=dict(
28
+ checkpoint='open-mmlab://resnext101_32x4d', type='Pretrained'),
29
+ norm_cfg=dict(requires_grad=True, type='BN'),
30
+ norm_eval=True,
31
+ num_stages=4,
32
+ out_indices=(
33
+ 0,
34
+ 1,
35
+ 2,
36
+ 3,
37
+ ),
38
+ style='pytorch',
39
+ type='ResNeXt'),
40
+ data_preprocessor=dict(
41
+ bgr_to_rgb=True,
42
+ mean=[
43
+ 123.675,
44
+ 116.28,
45
+ 103.53,
46
+ ],
47
+ pad_size_divisor=32,
48
+ std=[
49
+ 58.395,
50
+ 57.12,
51
+ 57.375,
52
+ ],
53
+ type='DetDataPreprocessor'),
54
+ neck=dict(
55
+ in_channels=[
56
+ 256,
57
+ 512,
58
+ 1024,
59
+ 2048,
60
+ ],
61
+ num_outs=5,
62
+ out_channels=256,
63
+ type='FPN'),
64
+ roi_head=dict(
65
+ bbox_head=[
66
+ dict(
67
+ bbox_coder=dict(
68
+ target_means=[
69
+ 0.0,
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ ],
74
+ target_stds=[
75
+ 0.1,
76
+ 0.1,
77
+ 0.2,
78
+ 0.2,
79
+ ],
80
+ type='DeltaXYWHBBoxCoder'),
81
+ fc_out_channels=1024,
82
+ in_channels=256,
83
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
84
+ loss_cls=dict(
85
+ loss_weight=1.0,
86
+ type='CrossEntropyLoss',
87
+ use_sigmoid=False),
88
+ num_classes=5,
89
+ reg_class_agnostic=True,
90
+ roi_feat_size=7,
91
+ type='Shared2FCBBoxHead'),
92
+ dict(
93
+ bbox_coder=dict(
94
+ target_means=[
95
+ 0.0,
96
+ 0.0,
97
+ 0.0,
98
+ 0.0,
99
+ ],
100
+ target_stds=[
101
+ 0.05,
102
+ 0.05,
103
+ 0.1,
104
+ 0.1,
105
+ ],
106
+ type='DeltaXYWHBBoxCoder'),
107
+ fc_out_channels=1024,
108
+ in_channels=256,
109
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
110
+ loss_cls=dict(
111
+ loss_weight=1.0,
112
+ type='CrossEntropyLoss',
113
+ use_sigmoid=False),
114
+ num_classes=5,
115
+ reg_class_agnostic=True,
116
+ roi_feat_size=7,
117
+ type='Shared2FCBBoxHead'),
118
+ dict(
119
+ bbox_coder=dict(
120
+ target_means=[
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ ],
126
+ target_stds=[
127
+ 0.033,
128
+ 0.033,
129
+ 0.067,
130
+ 0.067,
131
+ ],
132
+ type='DeltaXYWHBBoxCoder'),
133
+ fc_out_channels=1024,
134
+ in_channels=256,
135
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
136
+ loss_cls=dict(
137
+ loss_weight=1.0,
138
+ type='CrossEntropyLoss',
139
+ use_sigmoid=False),
140
+ num_classes=5,
141
+ reg_class_agnostic=True,
142
+ roi_feat_size=7,
143
+ type='Shared2FCBBoxHead'),
144
+ ],
145
+ bbox_roi_extractor=dict(
146
+ featmap_strides=[
147
+ 4,
148
+ 8,
149
+ 16,
150
+ 32,
151
+ ],
152
+ out_channels=256,
153
+ roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
154
+ type='SingleRoIExtractor'),
155
+ num_stages=3,
156
+ stage_loss_weights=[
157
+ 1,
158
+ 0.5,
159
+ 0.25,
160
+ ],
161
+ type='CascadeRoIHead'),
162
+ rpn_head=dict(
163
+ anchor_generator=dict(
164
+ ratios=[
165
+ 0.5,
166
+ 1.0,
167
+ 2.0,
168
+ ],
169
+ scales=[
170
+ 8,
171
+ ],
172
+ strides=[
173
+ 4,
174
+ 8,
175
+ 16,
176
+ 32,
177
+ 64,
178
+ ],
179
+ type='AnchorGenerator'),
180
+ bbox_coder=dict(
181
+ target_means=[
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ 0.0,
186
+ ],
187
+ target_stds=[
188
+ 1.0,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ ],
193
+ type='DeltaXYWHBBoxCoder'),
194
+ feat_channels=256,
195
+ in_channels=256,
196
+ loss_bbox=dict(
197
+ beta=0.1111111111111111, loss_weight=1.0, type='SmoothL1Loss'),
198
+ loss_cls=dict(
199
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
200
+ type='RPNHead'),
201
+ test_cfg=dict(
202
+ rcnn=dict(
203
+ max_per_img=100,
204
+ nms=dict(iou_threshold=0.5, type='nms'),
205
+ score_thr=0.05),
206
+ rpn=dict(
207
+ max_per_img=1000,
208
+ min_bbox_size=0,
209
+ nms=dict(iou_threshold=0.7, type='nms'),
210
+ nms_pre=1000)),
211
+ train_cfg=dict(
212
+ rcnn=[
213
+ dict(
214
+ assigner=dict(
215
+ ignore_iof_thr=-1,
216
+ match_low_quality=False,
217
+ min_pos_iou=0.5,
218
+ neg_iou_thr=0.5,
219
+ pos_iou_thr=0.5,
220
+ type='MaxIoUAssigner'),
221
+ debug=False,
222
+ pos_weight=-1,
223
+ sampler=dict(
224
+ add_gt_as_proposals=True,
225
+ neg_pos_ub=-1,
226
+ num=512,
227
+ pos_fraction=0.25,
228
+ type='RandomSampler')),
229
+ dict(
230
+ assigner=dict(
231
+ ignore_iof_thr=-1,
232
+ match_low_quality=False,
233
+ min_pos_iou=0.6,
234
+ neg_iou_thr=0.6,
235
+ pos_iou_thr=0.6,
236
+ type='MaxIoUAssigner'),
237
+ debug=False,
238
+ pos_weight=-1,
239
+ sampler=dict(
240
+ add_gt_as_proposals=True,
241
+ neg_pos_ub=-1,
242
+ num=512,
243
+ pos_fraction=0.25,
244
+ type='RandomSampler')),
245
+ dict(
246
+ assigner=dict(
247
+ ignore_iof_thr=-1,
248
+ match_low_quality=False,
249
+ min_pos_iou=0.7,
250
+ neg_iou_thr=0.7,
251
+ pos_iou_thr=0.7,
252
+ type='MaxIoUAssigner'),
253
+ debug=False,
254
+ pos_weight=-1,
255
+ sampler=dict(
256
+ add_gt_as_proposals=True,
257
+ neg_pos_ub=-1,
258
+ num=512,
259
+ pos_fraction=0.25,
260
+ type='RandomSampler')),
261
+ ],
262
+ rpn=dict(
263
+ allowed_border=0,
264
+ assigner=dict(
265
+ ignore_iof_thr=-1,
266
+ match_low_quality=True,
267
+ min_pos_iou=0.3,
268
+ neg_iou_thr=0.3,
269
+ pos_iou_thr=0.7,
270
+ type='MaxIoUAssigner'),
271
+ debug=False,
272
+ pos_weight=-1,
273
+ sampler=dict(
274
+ add_gt_as_proposals=False,
275
+ neg_pos_ub=-1,
276
+ num=256,
277
+ pos_fraction=0.5,
278
+ type='RandomSampler')),
279
+ rpn_proposal=dict(
280
+ max_per_img=2000,
281
+ min_bbox_size=0,
282
+ nms=dict(iou_threshold=0.7, type='nms'),
283
+ nms_pre=2000)),
284
+ type='CascadeRCNN')
285
+ optim_wrapper = dict(
286
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
287
+ type='OptimWrapper')
288
+ param_scheduler = [
289
+ dict(
290
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
291
+ dict(
292
+ begin=0,
293
+ by_epoch=True,
294
+ end=12,
295
+ gamma=0.1,
296
+ milestones=[
297
+ 8,
298
+ 11,
299
+ ],
300
+ type='MultiStepLR'),
301
+ ]
302
+ resume = False
303
+ test_cfg = dict(type='TestLoop')
304
+ test_dataloader = dict(
305
+ batch_size=8,
306
+ dataset=dict(
307
+ ann_file='annotations/test.json',
308
+ backend_args=None,
309
+ data_prefix=dict(img='images/test/'),
310
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
311
+ pipeline=[
312
+ dict(backend_args=None, type='LoadImageFromFile'),
313
+ dict(keep_ratio=True, scale=(
314
+ 512,
315
+ 512,
316
+ ), type='Resize'),
317
+ dict(type='LoadAnnotations', with_bbox=True),
318
+ dict(
319
+ meta_keys=(
320
+ 'img_id',
321
+ 'img_path',
322
+ 'ori_shape',
323
+ 'img_shape',
324
+ 'scale_factor',
325
+ ),
326
+ type='PackDetInputs'),
327
+ ],
328
+ test_mode=True,
329
+ type='CocoCTDataset'),
330
+ drop_last=False,
331
+ num_workers=4,
332
+ persistent_workers=True,
333
+ sampler=dict(shuffle=False, type='DefaultSampler'))
334
+ test_evaluator = dict(
335
+ ann_file=
336
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
337
+ backend_args=None,
338
+ format_only=False,
339
+ metric='bbox',
340
+ type='CocoMetric')
341
+ test_pipeline = [
342
+ dict(backend_args=None, type='LoadImageFromFile'),
343
+ dict(keep_ratio=True, scale=(
344
+ 512,
345
+ 512,
346
+ ), type='Resize'),
347
+ dict(type='LoadAnnotations', with_bbox=True),
348
+ dict(
349
+ meta_keys=(
350
+ 'img_id',
351
+ 'img_path',
352
+ 'ori_shape',
353
+ 'img_shape',
354
+ 'scale_factor',
355
+ ),
356
+ type='PackDetInputs'),
357
+ ]
358
+ train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
359
+ train_dataloader = dict(
360
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
361
+ batch_size=8,
362
+ dataset=dict(
363
+ ann_file='annotations/train_wsyn.json',
364
+ backend_args=None,
365
+ data_prefix=dict(img='images/train/'),
366
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
367
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
368
+ pipeline=[
369
+ dict(backend_args=None, type='LoadImageFromFile'),
370
+ dict(type='LoadAnnotations', with_bbox=True),
371
+ dict(keep_ratio=True, scale=(
372
+ 512,
373
+ 512,
374
+ ), type='Resize'),
375
+ dict(prob=0.5, type='RandomFlip'),
376
+ dict(type='PackDetInputs'),
377
+ ],
378
+ type='CocoCTDataset'),
379
+ num_workers=4,
380
+ persistent_workers=True,
381
+ sampler=dict(shuffle=True, type='DefaultSampler'))
382
+ train_pipeline = [
383
+ dict(backend_args=None, type='LoadImageFromFile'),
384
+ dict(type='LoadAnnotations', with_bbox=True),
385
+ dict(keep_ratio=True, scale=(
386
+ 512,
387
+ 512,
388
+ ), type='Resize'),
389
+ dict(prob=0.5, type='RandomFlip'),
390
+ dict(type='PackDetInputs'),
391
+ ]
392
+ val_cfg = dict(type='ValLoop')
393
+ val_dataloader = dict(
394
+ batch_size=8,
395
+ dataset=dict(
396
+ ann_file='annotations/test.json',
397
+ backend_args=None,
398
+ data_prefix=dict(img='images/test/'),
399
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
400
+ pipeline=[
401
+ dict(backend_args=None, type='LoadImageFromFile'),
402
+ dict(keep_ratio=True, scale=(
403
+ 512,
404
+ 512,
405
+ ), type='Resize'),
406
+ dict(type='LoadAnnotations', with_bbox=True),
407
+ dict(
408
+ meta_keys=(
409
+ 'img_id',
410
+ 'img_path',
411
+ 'ori_shape',
412
+ 'img_shape',
413
+ 'scale_factor',
414
+ ),
415
+ type='PackDetInputs'),
416
+ ],
417
+ test_mode=True,
418
+ type='CocoCTDataset'),
419
+ drop_last=False,
420
+ num_workers=4,
421
+ persistent_workers=True,
422
+ sampler=dict(shuffle=False, type='DefaultSampler'))
423
+ val_evaluator = dict(
424
+ ann_file=
425
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
426
+ backend_args=None,
427
+ format_only=False,
428
+ metric='bbox',
429
+ type='CocoMetric')
430
+ vis_backends = [
431
+ dict(type='LocalVisBackend'),
432
+ ]
433
+ visualizer = dict(
434
+ name='visualizer',
435
+ type='DetLocalVisualizer',
436
+ vis_backends=[
437
+ dict(type='LocalVisBackend'),
438
+ ])
439
+ work_dir = 'work_dirs/cascade-rcnn_x101-32x4d_fpn_1x_ct'
cascade-rcnn_x101-32x4d_fpn_1x_ct/20240412_192400/vis_data/scalars.json ADDED
The diff for this file is too large to render. See raw diff
 
cascade-rcnn_x101-32x4d_fpn_1x_ct/cascade-rcnn_x101-32x4d_fpn_1x_ct.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backend_args = None
3
+ data_root = '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/'
4
+ dataset_type = 'CocoCTDataset'
5
+ default_hooks = dict(
6
+ checkpoint=dict(interval=1, type='CheckpointHook'),
7
+ logger=dict(interval=50, type='LoggerHook'),
8
+ param_scheduler=dict(type='ParamSchedulerHook'),
9
+ sampler_seed=dict(type='DistSamplerSeedHook'),
10
+ timer=dict(type='IterTimerHook'),
11
+ visualization=dict(type='DetVisualizationHook'))
12
+ default_scope = 'mmdet'
13
+ env_cfg = dict(
14
+ cudnn_benchmark=False,
15
+ dist_cfg=dict(backend='nccl'),
16
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
17
+ launcher = 'pytorch'
18
+ load_from = 'ckpt/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth'
19
+ log_level = 'INFO'
20
+ log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
21
+ model = dict(
22
+ backbone=dict(
23
+ base_width=4,
24
+ depth=101,
25
+ frozen_stages=1,
26
+ groups=32,
27
+ init_cfg=dict(
28
+ checkpoint='open-mmlab://resnext101_32x4d', type='Pretrained'),
29
+ norm_cfg=dict(requires_grad=True, type='BN'),
30
+ norm_eval=True,
31
+ num_stages=4,
32
+ out_indices=(
33
+ 0,
34
+ 1,
35
+ 2,
36
+ 3,
37
+ ),
38
+ style='pytorch',
39
+ type='ResNeXt'),
40
+ data_preprocessor=dict(
41
+ bgr_to_rgb=True,
42
+ mean=[
43
+ 123.675,
44
+ 116.28,
45
+ 103.53,
46
+ ],
47
+ pad_size_divisor=32,
48
+ std=[
49
+ 58.395,
50
+ 57.12,
51
+ 57.375,
52
+ ],
53
+ type='DetDataPreprocessor'),
54
+ neck=dict(
55
+ in_channels=[
56
+ 256,
57
+ 512,
58
+ 1024,
59
+ 2048,
60
+ ],
61
+ num_outs=5,
62
+ out_channels=256,
63
+ type='FPN'),
64
+ roi_head=dict(
65
+ bbox_head=[
66
+ dict(
67
+ bbox_coder=dict(
68
+ target_means=[
69
+ 0.0,
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ ],
74
+ target_stds=[
75
+ 0.1,
76
+ 0.1,
77
+ 0.2,
78
+ 0.2,
79
+ ],
80
+ type='DeltaXYWHBBoxCoder'),
81
+ fc_out_channels=1024,
82
+ in_channels=256,
83
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
84
+ loss_cls=dict(
85
+ loss_weight=1.0,
86
+ type='CrossEntropyLoss',
87
+ use_sigmoid=False),
88
+ num_classes=5,
89
+ reg_class_agnostic=True,
90
+ roi_feat_size=7,
91
+ type='Shared2FCBBoxHead'),
92
+ dict(
93
+ bbox_coder=dict(
94
+ target_means=[
95
+ 0.0,
96
+ 0.0,
97
+ 0.0,
98
+ 0.0,
99
+ ],
100
+ target_stds=[
101
+ 0.05,
102
+ 0.05,
103
+ 0.1,
104
+ 0.1,
105
+ ],
106
+ type='DeltaXYWHBBoxCoder'),
107
+ fc_out_channels=1024,
108
+ in_channels=256,
109
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
110
+ loss_cls=dict(
111
+ loss_weight=1.0,
112
+ type='CrossEntropyLoss',
113
+ use_sigmoid=False),
114
+ num_classes=5,
115
+ reg_class_agnostic=True,
116
+ roi_feat_size=7,
117
+ type='Shared2FCBBoxHead'),
118
+ dict(
119
+ bbox_coder=dict(
120
+ target_means=[
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ ],
126
+ target_stds=[
127
+ 0.033,
128
+ 0.033,
129
+ 0.067,
130
+ 0.067,
131
+ ],
132
+ type='DeltaXYWHBBoxCoder'),
133
+ fc_out_channels=1024,
134
+ in_channels=256,
135
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
136
+ loss_cls=dict(
137
+ loss_weight=1.0,
138
+ type='CrossEntropyLoss',
139
+ use_sigmoid=False),
140
+ num_classes=5,
141
+ reg_class_agnostic=True,
142
+ roi_feat_size=7,
143
+ type='Shared2FCBBoxHead'),
144
+ ],
145
+ bbox_roi_extractor=dict(
146
+ featmap_strides=[
147
+ 4,
148
+ 8,
149
+ 16,
150
+ 32,
151
+ ],
152
+ out_channels=256,
153
+ roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
154
+ type='SingleRoIExtractor'),
155
+ num_stages=3,
156
+ stage_loss_weights=[
157
+ 1,
158
+ 0.5,
159
+ 0.25,
160
+ ],
161
+ type='CascadeRoIHead'),
162
+ rpn_head=dict(
163
+ anchor_generator=dict(
164
+ ratios=[
165
+ 0.5,
166
+ 1.0,
167
+ 2.0,
168
+ ],
169
+ scales=[
170
+ 8,
171
+ ],
172
+ strides=[
173
+ 4,
174
+ 8,
175
+ 16,
176
+ 32,
177
+ 64,
178
+ ],
179
+ type='AnchorGenerator'),
180
+ bbox_coder=dict(
181
+ target_means=[
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ 0.0,
186
+ ],
187
+ target_stds=[
188
+ 1.0,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ ],
193
+ type='DeltaXYWHBBoxCoder'),
194
+ feat_channels=256,
195
+ in_channels=256,
196
+ loss_bbox=dict(
197
+ beta=0.1111111111111111, loss_weight=1.0, type='SmoothL1Loss'),
198
+ loss_cls=dict(
199
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
200
+ type='RPNHead'),
201
+ test_cfg=dict(
202
+ rcnn=dict(
203
+ max_per_img=100,
204
+ nms=dict(iou_threshold=0.5, type='nms'),
205
+ score_thr=0.05),
206
+ rpn=dict(
207
+ max_per_img=1000,
208
+ min_bbox_size=0,
209
+ nms=dict(iou_threshold=0.7, type='nms'),
210
+ nms_pre=1000)),
211
+ train_cfg=dict(
212
+ rcnn=[
213
+ dict(
214
+ assigner=dict(
215
+ ignore_iof_thr=-1,
216
+ match_low_quality=False,
217
+ min_pos_iou=0.5,
218
+ neg_iou_thr=0.5,
219
+ pos_iou_thr=0.5,
220
+ type='MaxIoUAssigner'),
221
+ debug=False,
222
+ pos_weight=-1,
223
+ sampler=dict(
224
+ add_gt_as_proposals=True,
225
+ neg_pos_ub=-1,
226
+ num=512,
227
+ pos_fraction=0.25,
228
+ type='RandomSampler')),
229
+ dict(
230
+ assigner=dict(
231
+ ignore_iof_thr=-1,
232
+ match_low_quality=False,
233
+ min_pos_iou=0.6,
234
+ neg_iou_thr=0.6,
235
+ pos_iou_thr=0.6,
236
+ type='MaxIoUAssigner'),
237
+ debug=False,
238
+ pos_weight=-1,
239
+ sampler=dict(
240
+ add_gt_as_proposals=True,
241
+ neg_pos_ub=-1,
242
+ num=512,
243
+ pos_fraction=0.25,
244
+ type='RandomSampler')),
245
+ dict(
246
+ assigner=dict(
247
+ ignore_iof_thr=-1,
248
+ match_low_quality=False,
249
+ min_pos_iou=0.7,
250
+ neg_iou_thr=0.7,
251
+ pos_iou_thr=0.7,
252
+ type='MaxIoUAssigner'),
253
+ debug=False,
254
+ pos_weight=-1,
255
+ sampler=dict(
256
+ add_gt_as_proposals=True,
257
+ neg_pos_ub=-1,
258
+ num=512,
259
+ pos_fraction=0.25,
260
+ type='RandomSampler')),
261
+ ],
262
+ rpn=dict(
263
+ allowed_border=0,
264
+ assigner=dict(
265
+ ignore_iof_thr=-1,
266
+ match_low_quality=True,
267
+ min_pos_iou=0.3,
268
+ neg_iou_thr=0.3,
269
+ pos_iou_thr=0.7,
270
+ type='MaxIoUAssigner'),
271
+ debug=False,
272
+ pos_weight=-1,
273
+ sampler=dict(
274
+ add_gt_as_proposals=False,
275
+ neg_pos_ub=-1,
276
+ num=256,
277
+ pos_fraction=0.5,
278
+ type='RandomSampler')),
279
+ rpn_proposal=dict(
280
+ max_per_img=2000,
281
+ min_bbox_size=0,
282
+ nms=dict(iou_threshold=0.7, type='nms'),
283
+ nms_pre=2000)),
284
+ type='CascadeRCNN')
285
+ optim_wrapper = dict(
286
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
287
+ type='OptimWrapper')
288
+ param_scheduler = [
289
+ dict(
290
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
291
+ dict(
292
+ begin=0,
293
+ by_epoch=True,
294
+ end=12,
295
+ gamma=0.1,
296
+ milestones=[
297
+ 8,
298
+ 11,
299
+ ],
300
+ type='MultiStepLR'),
301
+ ]
302
+ resume = False
303
+ test_cfg = dict(type='TestLoop')
304
+ test_dataloader = dict(
305
+ batch_size=8,
306
+ dataset=dict(
307
+ ann_file='annotations/test.json',
308
+ backend_args=None,
309
+ data_prefix=dict(img='images/test/'),
310
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
311
+ pipeline=[
312
+ dict(backend_args=None, type='LoadImageFromFile'),
313
+ dict(keep_ratio=True, scale=(
314
+ 512,
315
+ 512,
316
+ ), type='Resize'),
317
+ dict(type='LoadAnnotations', with_bbox=True),
318
+ dict(
319
+ meta_keys=(
320
+ 'img_id',
321
+ 'img_path',
322
+ 'ori_shape',
323
+ 'img_shape',
324
+ 'scale_factor',
325
+ ),
326
+ type='PackDetInputs'),
327
+ ],
328
+ test_mode=True,
329
+ type='CocoCTDataset'),
330
+ drop_last=False,
331
+ num_workers=4,
332
+ persistent_workers=True,
333
+ sampler=dict(shuffle=False, type='DefaultSampler'))
334
+ test_evaluator = dict(
335
+ ann_file=
336
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
337
+ backend_args=None,
338
+ format_only=False,
339
+ metric='bbox',
340
+ type='CocoMetric')
341
+ test_pipeline = [
342
+ dict(backend_args=None, type='LoadImageFromFile'),
343
+ dict(keep_ratio=True, scale=(
344
+ 512,
345
+ 512,
346
+ ), type='Resize'),
347
+ dict(type='LoadAnnotations', with_bbox=True),
348
+ dict(
349
+ meta_keys=(
350
+ 'img_id',
351
+ 'img_path',
352
+ 'ori_shape',
353
+ 'img_shape',
354
+ 'scale_factor',
355
+ ),
356
+ type='PackDetInputs'),
357
+ ]
358
+ train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
359
+ train_dataloader = dict(
360
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
361
+ batch_size=8,
362
+ dataset=dict(
363
+ ann_file='annotations/train_wsyn.json',
364
+ backend_args=None,
365
+ data_prefix=dict(img='images/train/'),
366
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
367
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
368
+ pipeline=[
369
+ dict(backend_args=None, type='LoadImageFromFile'),
370
+ dict(type='LoadAnnotations', with_bbox=True),
371
+ dict(keep_ratio=True, scale=(
372
+ 512,
373
+ 512,
374
+ ), type='Resize'),
375
+ dict(prob=0.5, type='RandomFlip'),
376
+ dict(type='PackDetInputs'),
377
+ ],
378
+ type='CocoCTDataset'),
379
+ num_workers=4,
380
+ persistent_workers=True,
381
+ sampler=dict(shuffle=True, type='DefaultSampler'))
382
+ train_pipeline = [
383
+ dict(backend_args=None, type='LoadImageFromFile'),
384
+ dict(type='LoadAnnotations', with_bbox=True),
385
+ dict(keep_ratio=True, scale=(
386
+ 512,
387
+ 512,
388
+ ), type='Resize'),
389
+ dict(prob=0.5, type='RandomFlip'),
390
+ dict(type='PackDetInputs'),
391
+ ]
392
+ val_cfg = dict(type='ValLoop')
393
+ val_dataloader = dict(
394
+ batch_size=8,
395
+ dataset=dict(
396
+ ann_file='annotations/test.json',
397
+ backend_args=None,
398
+ data_prefix=dict(img='images/test/'),
399
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
400
+ pipeline=[
401
+ dict(backend_args=None, type='LoadImageFromFile'),
402
+ dict(keep_ratio=True, scale=(
403
+ 512,
404
+ 512,
405
+ ), type='Resize'),
406
+ dict(type='LoadAnnotations', with_bbox=True),
407
+ dict(
408
+ meta_keys=(
409
+ 'img_id',
410
+ 'img_path',
411
+ 'ori_shape',
412
+ 'img_shape',
413
+ 'scale_factor',
414
+ ),
415
+ type='PackDetInputs'),
416
+ ],
417
+ test_mode=True,
418
+ type='CocoCTDataset'),
419
+ drop_last=False,
420
+ num_workers=4,
421
+ persistent_workers=True,
422
+ sampler=dict(shuffle=False, type='DefaultSampler'))
423
+ val_evaluator = dict(
424
+ ann_file=
425
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
426
+ backend_args=None,
427
+ format_only=False,
428
+ metric='bbox',
429
+ type='CocoMetric')
430
+ vis_backends = [
431
+ dict(type='LocalVisBackend'),
432
+ ]
433
+ visualizer = dict(
434
+ name='visualizer',
435
+ type='DetLocalVisualizer',
436
+ vis_backends=[
437
+ dict(type='LocalVisBackend'),
438
+ ])
439
+ work_dir = 'work_dirs/cascade-rcnn_x101-32x4d_fpn_1x_ct'
cascade-rcnn_x101-32x4d_fpn_1x_ct/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf7e7b96b6cd52aff250301b864a09d798e2a55cc55cfb25403e449645e633f
3
+ size 705747963
cascade-rcnn_x101-64x4d_fpn_1x_ct/20240412_193331/20240412_193331.log ADDED
The diff for this file is too large to render. See raw diff
 
cascade-rcnn_x101-64x4d_fpn_1x_ct/20240412_193331/vis_data/20240412_193331.json ADDED
The diff for this file is too large to render. See raw diff
 
cascade-rcnn_x101-64x4d_fpn_1x_ct/20240412_193331/vis_data/config.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backend_args = None
3
+ data_root = '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/'
4
+ dataset_type = 'CocoCTDataset'
5
+ default_hooks = dict(
6
+ checkpoint=dict(interval=1, type='CheckpointHook'),
7
+ logger=dict(interval=50, type='LoggerHook'),
8
+ param_scheduler=dict(type='ParamSchedulerHook'),
9
+ sampler_seed=dict(type='DistSamplerSeedHook'),
10
+ timer=dict(type='IterTimerHook'),
11
+ visualization=dict(type='DetVisualizationHook'))
12
+ default_scope = 'mmdet'
13
+ env_cfg = dict(
14
+ cudnn_benchmark=False,
15
+ dist_cfg=dict(backend='nccl'),
16
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
17
+ launcher = 'pytorch'
18
+ load_from = 'ckpt/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth'
19
+ log_level = 'INFO'
20
+ log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
21
+ model = dict(
22
+ backbone=dict(
23
+ base_width=4,
24
+ depth=101,
25
+ frozen_stages=1,
26
+ groups=64,
27
+ init_cfg=dict(
28
+ checkpoint='open-mmlab://resnext101_64x4d', type='Pretrained'),
29
+ norm_cfg=dict(requires_grad=True, type='BN'),
30
+ norm_eval=True,
31
+ num_stages=4,
32
+ out_indices=(
33
+ 0,
34
+ 1,
35
+ 2,
36
+ 3,
37
+ ),
38
+ style='pytorch',
39
+ type='ResNeXt'),
40
+ data_preprocessor=dict(
41
+ bgr_to_rgb=True,
42
+ mean=[
43
+ 123.675,
44
+ 116.28,
45
+ 103.53,
46
+ ],
47
+ pad_size_divisor=32,
48
+ std=[
49
+ 58.395,
50
+ 57.12,
51
+ 57.375,
52
+ ],
53
+ type='DetDataPreprocessor'),
54
+ neck=dict(
55
+ in_channels=[
56
+ 256,
57
+ 512,
58
+ 1024,
59
+ 2048,
60
+ ],
61
+ num_outs=5,
62
+ out_channels=256,
63
+ type='FPN'),
64
+ roi_head=dict(
65
+ bbox_head=[
66
+ dict(
67
+ bbox_coder=dict(
68
+ target_means=[
69
+ 0.0,
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ ],
74
+ target_stds=[
75
+ 0.1,
76
+ 0.1,
77
+ 0.2,
78
+ 0.2,
79
+ ],
80
+ type='DeltaXYWHBBoxCoder'),
81
+ fc_out_channels=1024,
82
+ in_channels=256,
83
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
84
+ loss_cls=dict(
85
+ loss_weight=1.0,
86
+ type='CrossEntropyLoss',
87
+ use_sigmoid=False),
88
+ num_classes=5,
89
+ reg_class_agnostic=True,
90
+ roi_feat_size=7,
91
+ type='Shared2FCBBoxHead'),
92
+ dict(
93
+ bbox_coder=dict(
94
+ target_means=[
95
+ 0.0,
96
+ 0.0,
97
+ 0.0,
98
+ 0.0,
99
+ ],
100
+ target_stds=[
101
+ 0.05,
102
+ 0.05,
103
+ 0.1,
104
+ 0.1,
105
+ ],
106
+ type='DeltaXYWHBBoxCoder'),
107
+ fc_out_channels=1024,
108
+ in_channels=256,
109
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
110
+ loss_cls=dict(
111
+ loss_weight=1.0,
112
+ type='CrossEntropyLoss',
113
+ use_sigmoid=False),
114
+ num_classes=5,
115
+ reg_class_agnostic=True,
116
+ roi_feat_size=7,
117
+ type='Shared2FCBBoxHead'),
118
+ dict(
119
+ bbox_coder=dict(
120
+ target_means=[
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ ],
126
+ target_stds=[
127
+ 0.033,
128
+ 0.033,
129
+ 0.067,
130
+ 0.067,
131
+ ],
132
+ type='DeltaXYWHBBoxCoder'),
133
+ fc_out_channels=1024,
134
+ in_channels=256,
135
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
136
+ loss_cls=dict(
137
+ loss_weight=1.0,
138
+ type='CrossEntropyLoss',
139
+ use_sigmoid=False),
140
+ num_classes=5,
141
+ reg_class_agnostic=True,
142
+ roi_feat_size=7,
143
+ type='Shared2FCBBoxHead'),
144
+ ],
145
+ bbox_roi_extractor=dict(
146
+ featmap_strides=[
147
+ 4,
148
+ 8,
149
+ 16,
150
+ 32,
151
+ ],
152
+ out_channels=256,
153
+ roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
154
+ type='SingleRoIExtractor'),
155
+ num_stages=3,
156
+ stage_loss_weights=[
157
+ 1,
158
+ 0.5,
159
+ 0.25,
160
+ ],
161
+ type='CascadeRoIHead'),
162
+ rpn_head=dict(
163
+ anchor_generator=dict(
164
+ ratios=[
165
+ 0.5,
166
+ 1.0,
167
+ 2.0,
168
+ ],
169
+ scales=[
170
+ 8,
171
+ ],
172
+ strides=[
173
+ 4,
174
+ 8,
175
+ 16,
176
+ 32,
177
+ 64,
178
+ ],
179
+ type='AnchorGenerator'),
180
+ bbox_coder=dict(
181
+ target_means=[
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ 0.0,
186
+ ],
187
+ target_stds=[
188
+ 1.0,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ ],
193
+ type='DeltaXYWHBBoxCoder'),
194
+ feat_channels=256,
195
+ in_channels=256,
196
+ loss_bbox=dict(
197
+ beta=0.1111111111111111, loss_weight=1.0, type='SmoothL1Loss'),
198
+ loss_cls=dict(
199
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
200
+ type='RPNHead'),
201
+ test_cfg=dict(
202
+ rcnn=dict(
203
+ max_per_img=100,
204
+ nms=dict(iou_threshold=0.5, type='nms'),
205
+ score_thr=0.05),
206
+ rpn=dict(
207
+ max_per_img=1000,
208
+ min_bbox_size=0,
209
+ nms=dict(iou_threshold=0.7, type='nms'),
210
+ nms_pre=1000)),
211
+ train_cfg=dict(
212
+ rcnn=[
213
+ dict(
214
+ assigner=dict(
215
+ ignore_iof_thr=-1,
216
+ match_low_quality=False,
217
+ min_pos_iou=0.5,
218
+ neg_iou_thr=0.5,
219
+ pos_iou_thr=0.5,
220
+ type='MaxIoUAssigner'),
221
+ debug=False,
222
+ pos_weight=-1,
223
+ sampler=dict(
224
+ add_gt_as_proposals=True,
225
+ neg_pos_ub=-1,
226
+ num=512,
227
+ pos_fraction=0.25,
228
+ type='RandomSampler')),
229
+ dict(
230
+ assigner=dict(
231
+ ignore_iof_thr=-1,
232
+ match_low_quality=False,
233
+ min_pos_iou=0.6,
234
+ neg_iou_thr=0.6,
235
+ pos_iou_thr=0.6,
236
+ type='MaxIoUAssigner'),
237
+ debug=False,
238
+ pos_weight=-1,
239
+ sampler=dict(
240
+ add_gt_as_proposals=True,
241
+ neg_pos_ub=-1,
242
+ num=512,
243
+ pos_fraction=0.25,
244
+ type='RandomSampler')),
245
+ dict(
246
+ assigner=dict(
247
+ ignore_iof_thr=-1,
248
+ match_low_quality=False,
249
+ min_pos_iou=0.7,
250
+ neg_iou_thr=0.7,
251
+ pos_iou_thr=0.7,
252
+ type='MaxIoUAssigner'),
253
+ debug=False,
254
+ pos_weight=-1,
255
+ sampler=dict(
256
+ add_gt_as_proposals=True,
257
+ neg_pos_ub=-1,
258
+ num=512,
259
+ pos_fraction=0.25,
260
+ type='RandomSampler')),
261
+ ],
262
+ rpn=dict(
263
+ allowed_border=0,
264
+ assigner=dict(
265
+ ignore_iof_thr=-1,
266
+ match_low_quality=True,
267
+ min_pos_iou=0.3,
268
+ neg_iou_thr=0.3,
269
+ pos_iou_thr=0.7,
270
+ type='MaxIoUAssigner'),
271
+ debug=False,
272
+ pos_weight=-1,
273
+ sampler=dict(
274
+ add_gt_as_proposals=False,
275
+ neg_pos_ub=-1,
276
+ num=256,
277
+ pos_fraction=0.5,
278
+ type='RandomSampler')),
279
+ rpn_proposal=dict(
280
+ max_per_img=2000,
281
+ min_bbox_size=0,
282
+ nms=dict(iou_threshold=0.7, type='nms'),
283
+ nms_pre=2000)),
284
+ type='CascadeRCNN')
285
+ optim_wrapper = dict(
286
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
287
+ type='OptimWrapper')
288
+ param_scheduler = [
289
+ dict(
290
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
291
+ dict(
292
+ begin=0,
293
+ by_epoch=True,
294
+ end=12,
295
+ gamma=0.1,
296
+ milestones=[
297
+ 8,
298
+ 11,
299
+ ],
300
+ type='MultiStepLR'),
301
+ ]
302
+ resume = False
303
+ test_cfg = dict(type='TestLoop')
304
+ test_dataloader = dict(
305
+ batch_size=8,
306
+ dataset=dict(
307
+ ann_file='annotations/test.json',
308
+ backend_args=None,
309
+ data_prefix=dict(img='images/test/'),
310
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
311
+ pipeline=[
312
+ dict(backend_args=None, type='LoadImageFromFile'),
313
+ dict(keep_ratio=True, scale=(
314
+ 512,
315
+ 512,
316
+ ), type='Resize'),
317
+ dict(type='LoadAnnotations', with_bbox=True),
318
+ dict(
319
+ meta_keys=(
320
+ 'img_id',
321
+ 'img_path',
322
+ 'ori_shape',
323
+ 'img_shape',
324
+ 'scale_factor',
325
+ ),
326
+ type='PackDetInputs'),
327
+ ],
328
+ test_mode=True,
329
+ type='CocoCTDataset'),
330
+ drop_last=False,
331
+ num_workers=4,
332
+ persistent_workers=True,
333
+ sampler=dict(shuffle=False, type='DefaultSampler'))
334
+ test_evaluator = dict(
335
+ ann_file=
336
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
337
+ backend_args=None,
338
+ format_only=False,
339
+ metric='bbox',
340
+ type='CocoMetric')
341
+ test_pipeline = [
342
+ dict(backend_args=None, type='LoadImageFromFile'),
343
+ dict(keep_ratio=True, scale=(
344
+ 512,
345
+ 512,
346
+ ), type='Resize'),
347
+ dict(type='LoadAnnotations', with_bbox=True),
348
+ dict(
349
+ meta_keys=(
350
+ 'img_id',
351
+ 'img_path',
352
+ 'ori_shape',
353
+ 'img_shape',
354
+ 'scale_factor',
355
+ ),
356
+ type='PackDetInputs'),
357
+ ]
358
+ train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
359
+ train_dataloader = dict(
360
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
361
+ batch_size=8,
362
+ dataset=dict(
363
+ ann_file='annotations/train_wsyn.json',
364
+ backend_args=None,
365
+ data_prefix=dict(img='images/train/'),
366
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
367
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
368
+ pipeline=[
369
+ dict(backend_args=None, type='LoadImageFromFile'),
370
+ dict(type='LoadAnnotations', with_bbox=True),
371
+ dict(keep_ratio=True, scale=(
372
+ 512,
373
+ 512,
374
+ ), type='Resize'),
375
+ dict(prob=0.5, type='RandomFlip'),
376
+ dict(type='PackDetInputs'),
377
+ ],
378
+ type='CocoCTDataset'),
379
+ num_workers=4,
380
+ persistent_workers=True,
381
+ sampler=dict(shuffle=True, type='DefaultSampler'))
382
+ train_pipeline = [
383
+ dict(backend_args=None, type='LoadImageFromFile'),
384
+ dict(type='LoadAnnotations', with_bbox=True),
385
+ dict(keep_ratio=True, scale=(
386
+ 512,
387
+ 512,
388
+ ), type='Resize'),
389
+ dict(prob=0.5, type='RandomFlip'),
390
+ dict(type='PackDetInputs'),
391
+ ]
392
+ val_cfg = dict(type='ValLoop')
393
+ val_dataloader = dict(
394
+ batch_size=8,
395
+ dataset=dict(
396
+ ann_file='annotations/test.json',
397
+ backend_args=None,
398
+ data_prefix=dict(img='images/test/'),
399
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
400
+ pipeline=[
401
+ dict(backend_args=None, type='LoadImageFromFile'),
402
+ dict(keep_ratio=True, scale=(
403
+ 512,
404
+ 512,
405
+ ), type='Resize'),
406
+ dict(type='LoadAnnotations', with_bbox=True),
407
+ dict(
408
+ meta_keys=(
409
+ 'img_id',
410
+ 'img_path',
411
+ 'ori_shape',
412
+ 'img_shape',
413
+ 'scale_factor',
414
+ ),
415
+ type='PackDetInputs'),
416
+ ],
417
+ test_mode=True,
418
+ type='CocoCTDataset'),
419
+ drop_last=False,
420
+ num_workers=4,
421
+ persistent_workers=True,
422
+ sampler=dict(shuffle=False, type='DefaultSampler'))
423
+ val_evaluator = dict(
424
+ ann_file=
425
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
426
+ backend_args=None,
427
+ format_only=False,
428
+ metric='bbox',
429
+ type='CocoMetric')
430
+ vis_backends = [
431
+ dict(type='LocalVisBackend'),
432
+ ]
433
+ visualizer = dict(
434
+ name='visualizer',
435
+ type='DetLocalVisualizer',
436
+ vis_backends=[
437
+ dict(type='LocalVisBackend'),
438
+ ])
439
+ work_dir = 'work_dirs/cascade-rcnn_x101-64x4d_fpn_1x_ct'
cascade-rcnn_x101-64x4d_fpn_1x_ct/20240412_193331/vis_data/scalars.json ADDED
The diff for this file is too large to render. See raw diff
 
cascade-rcnn_x101-64x4d_fpn_1x_ct/cascade-rcnn_x101-64x4d_fpn_1x_ct.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backend_args = None
3
+ data_root = '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/'
4
+ dataset_type = 'CocoCTDataset'
5
+ default_hooks = dict(
6
+ checkpoint=dict(interval=1, type='CheckpointHook'),
7
+ logger=dict(interval=50, type='LoggerHook'),
8
+ param_scheduler=dict(type='ParamSchedulerHook'),
9
+ sampler_seed=dict(type='DistSamplerSeedHook'),
10
+ timer=dict(type='IterTimerHook'),
11
+ visualization=dict(type='DetVisualizationHook'))
12
+ default_scope = 'mmdet'
13
+ env_cfg = dict(
14
+ cudnn_benchmark=False,
15
+ dist_cfg=dict(backend='nccl'),
16
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
17
+ launcher = 'pytorch'
18
+ load_from = 'ckpt/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth'
19
+ log_level = 'INFO'
20
+ log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
21
+ model = dict(
22
+ backbone=dict(
23
+ base_width=4,
24
+ depth=101,
25
+ frozen_stages=1,
26
+ groups=64,
27
+ init_cfg=dict(
28
+ checkpoint='open-mmlab://resnext101_64x4d', type='Pretrained'),
29
+ norm_cfg=dict(requires_grad=True, type='BN'),
30
+ norm_eval=True,
31
+ num_stages=4,
32
+ out_indices=(
33
+ 0,
34
+ 1,
35
+ 2,
36
+ 3,
37
+ ),
38
+ style='pytorch',
39
+ type='ResNeXt'),
40
+ data_preprocessor=dict(
41
+ bgr_to_rgb=True,
42
+ mean=[
43
+ 123.675,
44
+ 116.28,
45
+ 103.53,
46
+ ],
47
+ pad_size_divisor=32,
48
+ std=[
49
+ 58.395,
50
+ 57.12,
51
+ 57.375,
52
+ ],
53
+ type='DetDataPreprocessor'),
54
+ neck=dict(
55
+ in_channels=[
56
+ 256,
57
+ 512,
58
+ 1024,
59
+ 2048,
60
+ ],
61
+ num_outs=5,
62
+ out_channels=256,
63
+ type='FPN'),
64
+ roi_head=dict(
65
+ bbox_head=[
66
+ dict(
67
+ bbox_coder=dict(
68
+ target_means=[
69
+ 0.0,
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ ],
74
+ target_stds=[
75
+ 0.1,
76
+ 0.1,
77
+ 0.2,
78
+ 0.2,
79
+ ],
80
+ type='DeltaXYWHBBoxCoder'),
81
+ fc_out_channels=1024,
82
+ in_channels=256,
83
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
84
+ loss_cls=dict(
85
+ loss_weight=1.0,
86
+ type='CrossEntropyLoss',
87
+ use_sigmoid=False),
88
+ num_classes=5,
89
+ reg_class_agnostic=True,
90
+ roi_feat_size=7,
91
+ type='Shared2FCBBoxHead'),
92
+ dict(
93
+ bbox_coder=dict(
94
+ target_means=[
95
+ 0.0,
96
+ 0.0,
97
+ 0.0,
98
+ 0.0,
99
+ ],
100
+ target_stds=[
101
+ 0.05,
102
+ 0.05,
103
+ 0.1,
104
+ 0.1,
105
+ ],
106
+ type='DeltaXYWHBBoxCoder'),
107
+ fc_out_channels=1024,
108
+ in_channels=256,
109
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
110
+ loss_cls=dict(
111
+ loss_weight=1.0,
112
+ type='CrossEntropyLoss',
113
+ use_sigmoid=False),
114
+ num_classes=5,
115
+ reg_class_agnostic=True,
116
+ roi_feat_size=7,
117
+ type='Shared2FCBBoxHead'),
118
+ dict(
119
+ bbox_coder=dict(
120
+ target_means=[
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ ],
126
+ target_stds=[
127
+ 0.033,
128
+ 0.033,
129
+ 0.067,
130
+ 0.067,
131
+ ],
132
+ type='DeltaXYWHBBoxCoder'),
133
+ fc_out_channels=1024,
134
+ in_channels=256,
135
+ loss_bbox=dict(beta=1.0, loss_weight=1.0, type='SmoothL1Loss'),
136
+ loss_cls=dict(
137
+ loss_weight=1.0,
138
+ type='CrossEntropyLoss',
139
+ use_sigmoid=False),
140
+ num_classes=5,
141
+ reg_class_agnostic=True,
142
+ roi_feat_size=7,
143
+ type='Shared2FCBBoxHead'),
144
+ ],
145
+ bbox_roi_extractor=dict(
146
+ featmap_strides=[
147
+ 4,
148
+ 8,
149
+ 16,
150
+ 32,
151
+ ],
152
+ out_channels=256,
153
+ roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
154
+ type='SingleRoIExtractor'),
155
+ num_stages=3,
156
+ stage_loss_weights=[
157
+ 1,
158
+ 0.5,
159
+ 0.25,
160
+ ],
161
+ type='CascadeRoIHead'),
162
+ rpn_head=dict(
163
+ anchor_generator=dict(
164
+ ratios=[
165
+ 0.5,
166
+ 1.0,
167
+ 2.0,
168
+ ],
169
+ scales=[
170
+ 8,
171
+ ],
172
+ strides=[
173
+ 4,
174
+ 8,
175
+ 16,
176
+ 32,
177
+ 64,
178
+ ],
179
+ type='AnchorGenerator'),
180
+ bbox_coder=dict(
181
+ target_means=[
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ 0.0,
186
+ ],
187
+ target_stds=[
188
+ 1.0,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ ],
193
+ type='DeltaXYWHBBoxCoder'),
194
+ feat_channels=256,
195
+ in_channels=256,
196
+ loss_bbox=dict(
197
+ beta=0.1111111111111111, loss_weight=1.0, type='SmoothL1Loss'),
198
+ loss_cls=dict(
199
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
200
+ type='RPNHead'),
201
+ test_cfg=dict(
202
+ rcnn=dict(
203
+ max_per_img=100,
204
+ nms=dict(iou_threshold=0.5, type='nms'),
205
+ score_thr=0.05),
206
+ rpn=dict(
207
+ max_per_img=1000,
208
+ min_bbox_size=0,
209
+ nms=dict(iou_threshold=0.7, type='nms'),
210
+ nms_pre=1000)),
211
+ train_cfg=dict(
212
+ rcnn=[
213
+ dict(
214
+ assigner=dict(
215
+ ignore_iof_thr=-1,
216
+ match_low_quality=False,
217
+ min_pos_iou=0.5,
218
+ neg_iou_thr=0.5,
219
+ pos_iou_thr=0.5,
220
+ type='MaxIoUAssigner'),
221
+ debug=False,
222
+ pos_weight=-1,
223
+ sampler=dict(
224
+ add_gt_as_proposals=True,
225
+ neg_pos_ub=-1,
226
+ num=512,
227
+ pos_fraction=0.25,
228
+ type='RandomSampler')),
229
+ dict(
230
+ assigner=dict(
231
+ ignore_iof_thr=-1,
232
+ match_low_quality=False,
233
+ min_pos_iou=0.6,
234
+ neg_iou_thr=0.6,
235
+ pos_iou_thr=0.6,
236
+ type='MaxIoUAssigner'),
237
+ debug=False,
238
+ pos_weight=-1,
239
+ sampler=dict(
240
+ add_gt_as_proposals=True,
241
+ neg_pos_ub=-1,
242
+ num=512,
243
+ pos_fraction=0.25,
244
+ type='RandomSampler')),
245
+ dict(
246
+ assigner=dict(
247
+ ignore_iof_thr=-1,
248
+ match_low_quality=False,
249
+ min_pos_iou=0.7,
250
+ neg_iou_thr=0.7,
251
+ pos_iou_thr=0.7,
252
+ type='MaxIoUAssigner'),
253
+ debug=False,
254
+ pos_weight=-1,
255
+ sampler=dict(
256
+ add_gt_as_proposals=True,
257
+ neg_pos_ub=-1,
258
+ num=512,
259
+ pos_fraction=0.25,
260
+ type='RandomSampler')),
261
+ ],
262
+ rpn=dict(
263
+ allowed_border=0,
264
+ assigner=dict(
265
+ ignore_iof_thr=-1,
266
+ match_low_quality=True,
267
+ min_pos_iou=0.3,
268
+ neg_iou_thr=0.3,
269
+ pos_iou_thr=0.7,
270
+ type='MaxIoUAssigner'),
271
+ debug=False,
272
+ pos_weight=-1,
273
+ sampler=dict(
274
+ add_gt_as_proposals=False,
275
+ neg_pos_ub=-1,
276
+ num=256,
277
+ pos_fraction=0.5,
278
+ type='RandomSampler')),
279
+ rpn_proposal=dict(
280
+ max_per_img=2000,
281
+ min_bbox_size=0,
282
+ nms=dict(iou_threshold=0.7, type='nms'),
283
+ nms_pre=2000)),
284
+ type='CascadeRCNN')
285
+ optim_wrapper = dict(
286
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
287
+ type='OptimWrapper')
288
+ param_scheduler = [
289
+ dict(
290
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
291
+ dict(
292
+ begin=0,
293
+ by_epoch=True,
294
+ end=12,
295
+ gamma=0.1,
296
+ milestones=[
297
+ 8,
298
+ 11,
299
+ ],
300
+ type='MultiStepLR'),
301
+ ]
302
+ resume = False
303
+ test_cfg = dict(type='TestLoop')
304
+ test_dataloader = dict(
305
+ batch_size=8,
306
+ dataset=dict(
307
+ ann_file='annotations/test.json',
308
+ backend_args=None,
309
+ data_prefix=dict(img='images/test/'),
310
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
311
+ pipeline=[
312
+ dict(backend_args=None, type='LoadImageFromFile'),
313
+ dict(keep_ratio=True, scale=(
314
+ 512,
315
+ 512,
316
+ ), type='Resize'),
317
+ dict(type='LoadAnnotations', with_bbox=True),
318
+ dict(
319
+ meta_keys=(
320
+ 'img_id',
321
+ 'img_path',
322
+ 'ori_shape',
323
+ 'img_shape',
324
+ 'scale_factor',
325
+ ),
326
+ type='PackDetInputs'),
327
+ ],
328
+ test_mode=True,
329
+ type='CocoCTDataset'),
330
+ drop_last=False,
331
+ num_workers=4,
332
+ persistent_workers=True,
333
+ sampler=dict(shuffle=False, type='DefaultSampler'))
334
+ test_evaluator = dict(
335
+ ann_file=
336
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
337
+ backend_args=None,
338
+ format_only=False,
339
+ metric='bbox',
340
+ type='CocoMetric')
341
+ test_pipeline = [
342
+ dict(backend_args=None, type='LoadImageFromFile'),
343
+ dict(keep_ratio=True, scale=(
344
+ 512,
345
+ 512,
346
+ ), type='Resize'),
347
+ dict(type='LoadAnnotations', with_bbox=True),
348
+ dict(
349
+ meta_keys=(
350
+ 'img_id',
351
+ 'img_path',
352
+ 'ori_shape',
353
+ 'img_shape',
354
+ 'scale_factor',
355
+ ),
356
+ type='PackDetInputs'),
357
+ ]
358
+ train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
359
+ train_dataloader = dict(
360
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
361
+ batch_size=8,
362
+ dataset=dict(
363
+ ann_file='annotations/train_wsyn.json',
364
+ backend_args=None,
365
+ data_prefix=dict(img='images/train/'),
366
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
367
+ filter_cfg=dict(filter_empty_gt=False, min_size=32),
368
+ pipeline=[
369
+ dict(backend_args=None, type='LoadImageFromFile'),
370
+ dict(type='LoadAnnotations', with_bbox=True),
371
+ dict(keep_ratio=True, scale=(
372
+ 512,
373
+ 512,
374
+ ), type='Resize'),
375
+ dict(prob=0.5, type='RandomFlip'),
376
+ dict(type='PackDetInputs'),
377
+ ],
378
+ type='CocoCTDataset'),
379
+ num_workers=4,
380
+ persistent_workers=True,
381
+ sampler=dict(shuffle=True, type='DefaultSampler'))
382
+ train_pipeline = [
383
+ dict(backend_args=None, type='LoadImageFromFile'),
384
+ dict(type='LoadAnnotations', with_bbox=True),
385
+ dict(keep_ratio=True, scale=(
386
+ 512,
387
+ 512,
388
+ ), type='Resize'),
389
+ dict(prob=0.5, type='RandomFlip'),
390
+ dict(type='PackDetInputs'),
391
+ ]
392
+ val_cfg = dict(type='ValLoop')
393
+ val_dataloader = dict(
394
+ batch_size=8,
395
+ dataset=dict(
396
+ ann_file='annotations/test.json',
397
+ backend_args=None,
398
+ data_prefix=dict(img='images/test/'),
399
+ data_root='/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/',
400
+ pipeline=[
401
+ dict(backend_args=None, type='LoadImageFromFile'),
402
+ dict(keep_ratio=True, scale=(
403
+ 512,
404
+ 512,
405
+ ), type='Resize'),
406
+ dict(type='LoadAnnotations', with_bbox=True),
407
+ dict(
408
+ meta_keys=(
409
+ 'img_id',
410
+ 'img_path',
411
+ 'ori_shape',
412
+ 'img_shape',
413
+ 'scale_factor',
414
+ ),
415
+ type='PackDetInputs'),
416
+ ],
417
+ test_mode=True,
418
+ type='CocoCTDataset'),
419
+ drop_last=False,
420
+ num_workers=4,
421
+ persistent_workers=True,
422
+ sampler=dict(shuffle=False, type='DefaultSampler'))
423
+ val_evaluator = dict(
424
+ ann_file=
425
+ '/mnt/bn/panxuran/Slice_Data/slice_dataset_maximum_0402/annotations/test.json',
426
+ backend_args=None,
427
+ format_only=False,
428
+ metric='bbox',
429
+ type='CocoMetric')
430
+ vis_backends = [
431
+ dict(type='LocalVisBackend'),
432
+ ]
433
+ visualizer = dict(
434
+ name='visualizer',
435
+ type='DetLocalVisualizer',
436
+ vis_backends=[
437
+ dict(type='LocalVisBackend'),
438
+ ])
439
+ work_dir = 'work_dirs/cascade-rcnn_x101-64x4d_fpn_1x_ct'
cascade-rcnn_x101-64x4d_fpn_1x_ct/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a735d734720fd3cc93d5b9401116b29c9e96c45b9679a0a0b52dabc94b34dea
3
+ size 1019471931
co_deformable_detr_r50_1x_ct/co_deformable_detr_r50_1x_ct.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = 'data/slice_dataset_maximum_0402/'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', with_bbox=True),
8
+ dict(type='RandomFlip', flip_ratio=0.5),
9
+ dict(
10
+ type='AutoAugment',
11
+ policies=[[{
12
+ 'type': 'Resize',
13
+ 'img_scale': [(512, 512)],
14
+ 'multiscale_mode': 'value',
15
+ 'keep_ratio': True
16
+ }],
17
+ [{
18
+ 'type': 'Resize',
19
+ 'img_scale': [(512, 512)],
20
+ 'multiscale_mode': 'value',
21
+ 'keep_ratio': True
22
+ }, {
23
+ 'type': 'RandomCrop',
24
+ 'crop_type': 'absolute_range',
25
+ 'crop_size': (512, 512),
26
+ 'allow_negative_crop': True
27
+ }, {
28
+ 'type': 'Resize',
29
+ 'img_scale': [(512, 512)],
30
+ 'multiscale_mode': 'value',
31
+ 'override': True,
32
+ 'keep_ratio': True
33
+ }]]),
34
+ dict(
35
+ type='Normalize',
36
+ mean=[123.675, 116.28, 103.53],
37
+ std=[58.395, 57.12, 57.375],
38
+ to_rgb=True),
39
+ dict(type='Pad', size_divisor=1),
40
+ dict(type='DefaultFormatBundle'),
41
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
42
+ ]
43
+ test_pipeline = [
44
+ dict(type='LoadImageFromFile'),
45
+ dict(
46
+ type='MultiScaleFlipAug',
47
+ img_scale=(512, 512),
48
+ flip=False,
49
+ transforms=[
50
+ dict(type='Resize', keep_ratio=True),
51
+ dict(type='RandomFlip'),
52
+ dict(
53
+ type='Normalize',
54
+ mean=[123.675, 116.28, 103.53],
55
+ std=[58.395, 57.12, 57.375],
56
+ to_rgb=True),
57
+ dict(type='Pad', size_divisor=1),
58
+ dict(type='ImageToTensor', keys=['img']),
59
+ dict(type='Collect', keys=['img'])
60
+ ])
61
+ ]
62
+ data = dict(
63
+ samples_per_gpu=16,
64
+ workers_per_gpu=4,
65
+ train=dict(
66
+ type='CocoDataset',
67
+ ann_file='data/slice_dataset_maximum_0402/annotations/train.json',
68
+ img_prefix='data/slice_dataset_maximum_0402/images/train/',
69
+ filter_empty_gt=False,
70
+ pipeline=[
71
+ dict(type='LoadImageFromFile'),
72
+ dict(type='LoadAnnotations', with_bbox=True),
73
+ dict(type='RandomFlip', flip_ratio=0.5),
74
+ dict(
75
+ type='AutoAugment',
76
+ policies=[[{
77
+ 'type': 'Resize',
78
+ 'img_scale': [(512, 512)],
79
+ 'multiscale_mode': 'value',
80
+ 'keep_ratio': True
81
+ }],
82
+ [{
83
+ 'type': 'Resize',
84
+ 'img_scale': [(512, 512)],
85
+ 'multiscale_mode': 'value',
86
+ 'keep_ratio': True
87
+ }, {
88
+ 'type': 'RandomCrop',
89
+ 'crop_type': 'absolute_range',
90
+ 'crop_size': (512, 512),
91
+ 'allow_negative_crop': True
92
+ }, {
93
+ 'type': 'Resize',
94
+ 'img_scale': [(512, 512)],
95
+ 'multiscale_mode': 'value',
96
+ 'override': True,
97
+ 'keep_ratio': True
98
+ }]]),
99
+ dict(
100
+ type='Normalize',
101
+ mean=[123.675, 116.28, 103.53],
102
+ std=[58.395, 57.12, 57.375],
103
+ to_rgb=True),
104
+ dict(type='Pad', size_divisor=1),
105
+ dict(type='DefaultFormatBundle'),
106
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
107
+ ]),
108
+ val=dict(
109
+ type='CocoDataset',
110
+ ann_file='data/slice_dataset_maximum_0402/annotations/test.json',
111
+ img_prefix='data/slice_dataset_maximum_0402/images/test/',
112
+ pipeline=[
113
+ dict(type='LoadImageFromFile'),
114
+ dict(
115
+ type='MultiScaleFlipAug',
116
+ img_scale=(512, 512),
117
+ flip=False,
118
+ transforms=[
119
+ dict(type='Resize', keep_ratio=True),
120
+ dict(type='RandomFlip'),
121
+ dict(
122
+ type='Normalize',
123
+ mean=[123.675, 116.28, 103.53],
124
+ std=[58.395, 57.12, 57.375],
125
+ to_rgb=True),
126
+ dict(type='Pad', size_divisor=1),
127
+ dict(type='ImageToTensor', keys=['img']),
128
+ dict(type='Collect', keys=['img'])
129
+ ])
130
+ ]),
131
+ test=dict(
132
+ type='CocoDataset',
133
+ ann_file='data/slice_dataset_maximum_0402/annotations/test.json',
134
+ img_prefix='data/slice_dataset_maximum_0402/images/test/',
135
+ pipeline=[
136
+ dict(type='LoadImageFromFile'),
137
+ dict(
138
+ type='MultiScaleFlipAug',
139
+ img_scale=(512, 512),
140
+ flip=False,
141
+ transforms=[
142
+ dict(type='Resize', keep_ratio=True),
143
+ dict(type='RandomFlip'),
144
+ dict(
145
+ type='Normalize',
146
+ mean=[123.675, 116.28, 103.53],
147
+ std=[58.395, 57.12, 57.375],
148
+ to_rgb=True),
149
+ dict(type='Pad', size_divisor=1),
150
+ dict(type='ImageToTensor', keys=['img']),
151
+ dict(type='Collect', keys=['img'])
152
+ ])
153
+ ]))
154
+ evaluation = dict(interval=1, metric='bbox')
155
+ checkpoint_config = dict(interval=1)
156
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
157
+ custom_hooks = [dict(type='NumClassCheckHook')]
158
+ dist_params = dict(backend='nccl')
159
+ log_level = 'INFO'
160
+ load_from = './ckpt/co_deformable_detr_r50_1x_coco.pth'
161
+ resume_from = None
162
+ workflow = [('train', 1)]
163
+ opencv_num_threads = 0
164
+ mp_start_method = 'fork'
165
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
166
+ num_dec_layer = 6
167
+ lambda_2 = 2.0
168
+ model = dict(
169
+ type='CoDETR',
170
+ backbone=dict(
171
+ type='ResNet',
172
+ depth=50,
173
+ num_stages=4,
174
+ out_indices=(1, 2, 3),
175
+ frozen_stages=1,
176
+ norm_cfg=dict(type='BN', requires_grad=False),
177
+ norm_eval=True,
178
+ style='pytorch',
179
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
180
+ neck=dict(
181
+ type='ChannelMapper',
182
+ in_channels=[512, 1024, 2048],
183
+ kernel_size=1,
184
+ out_channels=256,
185
+ act_cfg=None,
186
+ norm_cfg=dict(type='GN', num_groups=32),
187
+ num_outs=4),
188
+ rpn_head=dict(
189
+ type='RPNHead',
190
+ in_channels=256,
191
+ feat_channels=256,
192
+ anchor_generator=dict(
193
+ type='AnchorGenerator',
194
+ octave_base_scale=4,
195
+ scales_per_octave=3,
196
+ ratios=[0.5, 1.0, 2.0],
197
+ strides=[8, 16, 32, 64, 128]),
198
+ bbox_coder=dict(
199
+ type='DeltaXYWHBBoxCoder',
200
+ target_means=[0.0, 0.0, 0.0, 0.0],
201
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
202
+ loss_cls=dict(
203
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0),
204
+ loss_bbox=dict(type='L1Loss', loss_weight=12.0)),
205
+ query_head=dict(
206
+ type='CoDeformDETRHead',
207
+ num_query=300,
208
+ num_classes=5,
209
+ in_channels=2048,
210
+ sync_cls_avg_factor=True,
211
+ with_box_refine=True,
212
+ as_two_stage=True,
213
+ mixed_selection=True,
214
+ transformer=dict(
215
+ type='CoDeformableDetrTransformer',
216
+ num_co_heads=2,
217
+ encoder=dict(
218
+ type='DetrTransformerEncoder',
219
+ num_layers=6,
220
+ transformerlayers=dict(
221
+ type='BaseTransformerLayer',
222
+ attn_cfgs=dict(
223
+ type='MultiScaleDeformableAttention',
224
+ embed_dims=256,
225
+ dropout=0.0),
226
+ feedforward_channels=2048,
227
+ ffn_dropout=0.0,
228
+ operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
229
+ decoder=dict(
230
+ type='CoDeformableDetrTransformerDecoder',
231
+ num_layers=6,
232
+ return_intermediate=True,
233
+ look_forward_twice=True,
234
+ transformerlayers=dict(
235
+ type='DetrTransformerDecoderLayer',
236
+ attn_cfgs=[
237
+ dict(
238
+ type='MultiheadAttention',
239
+ embed_dims=256,
240
+ num_heads=8,
241
+ dropout=0.0),
242
+ dict(
243
+ type='MultiScaleDeformableAttention',
244
+ embed_dims=256,
245
+ dropout=0.0)
246
+ ],
247
+ feedforward_channels=2048,
248
+ ffn_dropout=0.0,
249
+ operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
250
+ 'ffn', 'norm')))),
251
+ positional_encoding=dict(
252
+ type='SinePositionalEncoding',
253
+ num_feats=128,
254
+ normalize=True,
255
+ offset=-0.5),
256
+ loss_cls=dict(
257
+ type='FocalLoss',
258
+ use_sigmoid=True,
259
+ gamma=2.0,
260
+ alpha=0.25,
261
+ loss_weight=2.0),
262
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
263
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
264
+ roi_head=[
265
+ dict(
266
+ type='CoStandardRoIHead',
267
+ bbox_roi_extractor=dict(
268
+ type='SingleRoIExtractor',
269
+ roi_layer=dict(
270
+ type='RoIAlign', output_size=7, sampling_ratio=0),
271
+ out_channels=256,
272
+ featmap_strides=[8, 16, 32, 64],
273
+ finest_scale=112),
274
+ bbox_head=dict(
275
+ type='Shared2FCBBoxHead',
276
+ in_channels=256,
277
+ fc_out_channels=1024,
278
+ roi_feat_size=7,
279
+ num_classes=5,
280
+ bbox_coder=dict(
281
+ type='DeltaXYWHBBoxCoder',
282
+ target_means=[0.0, 0.0, 0.0, 0.0],
283
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
284
+ reg_class_agnostic=False,
285
+ reg_decoded_bbox=True,
286
+ loss_cls=dict(
287
+ type='CrossEntropyLoss',
288
+ use_sigmoid=False,
289
+ loss_weight=12.0),
290
+ loss_bbox=dict(type='GIoULoss', loss_weight=120.0)))
291
+ ],
292
+ bbox_head=[
293
+ dict(
294
+ type='CoATSSHead',
295
+ num_classes=5,
296
+ in_channels=256,
297
+ stacked_convs=1,
298
+ feat_channels=256,
299
+ anchor_generator=dict(
300
+ type='AnchorGenerator',
301
+ ratios=[1.0],
302
+ octave_base_scale=8,
303
+ scales_per_octave=1,
304
+ strides=[8, 16, 32, 64, 128]),
305
+ bbox_coder=dict(
306
+ type='DeltaXYWHBBoxCoder',
307
+ target_means=[0.0, 0.0, 0.0, 0.0],
308
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
309
+ loss_cls=dict(
310
+ type='FocalLoss',
311
+ use_sigmoid=True,
312
+ gamma=2.0,
313
+ alpha=0.25,
314
+ loss_weight=12.0),
315
+ loss_bbox=dict(type='GIoULoss', loss_weight=24.0),
316
+ loss_centerness=dict(
317
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0))
318
+ ],
319
+ train_cfg=[
320
+ dict(
321
+ assigner=dict(
322
+ type='HungarianAssigner',
323
+ cls_cost=dict(type='FocalLossCost', weight=2.0),
324
+ reg_cost=dict(
325
+ type='BBoxL1Cost', weight=5.0, box_format='xywh'),
326
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
327
+ dict(
328
+ rpn=dict(
329
+ assigner=dict(
330
+ type='MaxIoUAssigner',
331
+ pos_iou_thr=0.7,
332
+ neg_iou_thr=0.3,
333
+ min_pos_iou=0.3,
334
+ match_low_quality=True,
335
+ ignore_iof_thr=-1),
336
+ sampler=dict(
337
+ type='RandomSampler',
338
+ num=256,
339
+ pos_fraction=0.5,
340
+ neg_pos_ub=-1,
341
+ add_gt_as_proposals=False),
342
+ allowed_border=-1,
343
+ pos_weight=-1,
344
+ debug=False),
345
+ rpn_proposal=dict(
346
+ nms_pre=4000,
347
+ max_per_img=1000,
348
+ nms=dict(type='nms', iou_threshold=0.7),
349
+ min_bbox_size=0),
350
+ rcnn=dict(
351
+ assigner=dict(
352
+ type='MaxIoUAssigner',
353
+ pos_iou_thr=0.5,
354
+ neg_iou_thr=0.5,
355
+ min_pos_iou=0.5,
356
+ match_low_quality=False,
357
+ ignore_iof_thr=-1),
358
+ sampler=dict(
359
+ type='RandomSampler',
360
+ num=512,
361
+ pos_fraction=0.25,
362
+ neg_pos_ub=-1,
363
+ add_gt_as_proposals=True),
364
+ pos_weight=-1,
365
+ debug=False)),
366
+ dict(
367
+ assigner=dict(type='ATSSAssigner', topk=9),
368
+ allowed_border=-1,
369
+ pos_weight=-1,
370
+ debug=False)
371
+ ],
372
+ test_cfg=[
373
+ dict(max_per_img=100),
374
+ dict(
375
+ rpn=dict(
376
+ nms_pre=1000,
377
+ max_per_img=1000,
378
+ nms=dict(type='nms', iou_threshold=0.7),
379
+ min_bbox_size=0),
380
+ rcnn=dict(
381
+ score_thr=0.0,
382
+ nms=dict(type='nms', iou_threshold=0.5),
383
+ max_per_img=100)),
384
+ dict(
385
+ nms_pre=1000,
386
+ min_bbox_size=0,
387
+ score_thr=0.0,
388
+ nms=dict(type='nms', iou_threshold=0.6),
389
+ max_per_img=100)
390
+ ])
391
+ optimizer = dict(
392
+ type='AdamW',
393
+ lr=0.0002,
394
+ weight_decay=0.0001,
395
+ paramwise_cfg=dict(
396
+ custom_keys=dict(
397
+ backbone=dict(lr_mult=0.1),
398
+ sampling_offsets=dict(lr_mult=0.1),
399
+ reference_points=dict(lr_mult=0.1))))
400
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
401
+ lr_config = dict(policy='step', step=[11])
402
+ runner = dict(type='EpochBasedRunner', max_epochs=200)
403
+ pretrained = './ckpt/co_deformable_detr_r50_1x_coco.pth'
404
+ resume = False
405
+ work_dir = 'work_dirs/co_deformable_detr_r50_1x_ct'
406
+ auto_resume = False
407
+ gpu_ids = range(0, 8)
co_deformable_detr_r50_1x_ct/epoch_40.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0367c20230c989c98a12957fdfb8346ada1fa020f879ff0f055ecabef6d0dd48
3
+ size 771820693
co_deformable_detr_swin_large_1x_ct/co_deformable_detr_swin_large_1x_ct.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = 'data/slice_dataset_maximum_0402/'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', with_bbox=True),
8
+ dict(type='RandomFlip', flip_ratio=0.5),
9
+ dict(
10
+ type='AutoAugment',
11
+ policies=[[{
12
+ 'type': 'Resize',
13
+ 'img_scale': [(512, 512)],
14
+ 'multiscale_mode': 'value',
15
+ 'keep_ratio': True
16
+ }],
17
+ [{
18
+ 'type': 'Resize',
19
+ 'img_scale': [(512, 512)],
20
+ 'multiscale_mode': 'value',
21
+ 'keep_ratio': True
22
+ }, {
23
+ 'type': 'RandomCrop',
24
+ 'crop_type': 'absolute_range',
25
+ 'crop_size': (512, 512),
26
+ 'allow_negative_crop': True
27
+ }, {
28
+ 'type': 'Resize',
29
+ 'img_scale': [(512, 512)],
30
+ 'multiscale_mode': 'value',
31
+ 'override': True,
32
+ 'keep_ratio': True
33
+ }]]),
34
+ dict(
35
+ type='Normalize',
36
+ mean=[123.675, 116.28, 103.53],
37
+ std=[58.395, 57.12, 57.375],
38
+ to_rgb=True),
39
+ dict(type='Pad', size_divisor=1),
40
+ dict(type='DefaultFormatBundle'),
41
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
42
+ ]
43
+ test_pipeline = [
44
+ dict(type='LoadImageFromFile'),
45
+ dict(
46
+ type='MultiScaleFlipAug',
47
+ img_scale=(512, 512),
48
+ flip=False,
49
+ transforms=[
50
+ dict(type='Resize', keep_ratio=True),
51
+ dict(type='RandomFlip'),
52
+ dict(
53
+ type='Normalize',
54
+ mean=[123.675, 116.28, 103.53],
55
+ std=[58.395, 57.12, 57.375],
56
+ to_rgb=True),
57
+ dict(type='Pad', size_divisor=1),
58
+ dict(type='ImageToTensor', keys=['img']),
59
+ dict(type='Collect', keys=['img'])
60
+ ])
61
+ ]
62
+ data = dict(
63
+ samples_per_gpu=4,
64
+ workers_per_gpu=4,
65
+ train=dict(
66
+ type='CocoDataset',
67
+ ann_file='data/slice_dataset_maximum_0402/annotations/train.json',
68
+ img_prefix='data/slice_dataset_maximum_0402/images/train/',
69
+ filter_empty_gt=False,
70
+ pipeline=[
71
+ dict(type='LoadImageFromFile'),
72
+ dict(type='LoadAnnotations', with_bbox=True),
73
+ dict(type='RandomFlip', flip_ratio=0.5),
74
+ dict(
75
+ type='AutoAugment',
76
+ policies=[[{
77
+ 'type': 'Resize',
78
+ 'img_scale': [(512, 512)],
79
+ 'multiscale_mode': 'value',
80
+ 'keep_ratio': True
81
+ }],
82
+ [{
83
+ 'type': 'Resize',
84
+ 'img_scale': [(512, 512)],
85
+ 'multiscale_mode': 'value',
86
+ 'keep_ratio': True
87
+ }, {
88
+ 'type': 'RandomCrop',
89
+ 'crop_type': 'absolute_range',
90
+ 'crop_size': (512, 512),
91
+ 'allow_negative_crop': True
92
+ }, {
93
+ 'type': 'Resize',
94
+ 'img_scale': [(512, 512)],
95
+ 'multiscale_mode': 'value',
96
+ 'override': True,
97
+ 'keep_ratio': True
98
+ }]]),
99
+ dict(
100
+ type='Normalize',
101
+ mean=[123.675, 116.28, 103.53],
102
+ std=[58.395, 57.12, 57.375],
103
+ to_rgb=True),
104
+ dict(type='Pad', size_divisor=1),
105
+ dict(type='DefaultFormatBundle'),
106
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
107
+ ]),
108
+ val=dict(
109
+ type='CocoDataset',
110
+ ann_file='data/slice_dataset_maximum_0402/annotations/test.json',
111
+ img_prefix='data/slice_dataset_maximum_0402/images/test/',
112
+ pipeline=[
113
+ dict(type='LoadImageFromFile'),
114
+ dict(
115
+ type='MultiScaleFlipAug',
116
+ img_scale=(512, 512),
117
+ flip=False,
118
+ transforms=[
119
+ dict(type='Resize', keep_ratio=True),
120
+ dict(type='RandomFlip'),
121
+ dict(
122
+ type='Normalize',
123
+ mean=[123.675, 116.28, 103.53],
124
+ std=[58.395, 57.12, 57.375],
125
+ to_rgb=True),
126
+ dict(type='Pad', size_divisor=1),
127
+ dict(type='ImageToTensor', keys=['img']),
128
+ dict(type='Collect', keys=['img'])
129
+ ])
130
+ ]),
131
+ test=dict(
132
+ type='CocoDataset',
133
+ ann_file='data/slice_dataset_maximum_0402/annotations/test.json',
134
+ img_prefix='data/slice_dataset_maximum_0402/images/test/',
135
+ pipeline=[
136
+ dict(type='LoadImageFromFile'),
137
+ dict(
138
+ type='MultiScaleFlipAug',
139
+ img_scale=(512, 512),
140
+ flip=False,
141
+ transforms=[
142
+ dict(type='Resize', keep_ratio=True),
143
+ dict(type='RandomFlip'),
144
+ dict(
145
+ type='Normalize',
146
+ mean=[123.675, 116.28, 103.53],
147
+ std=[58.395, 57.12, 57.375],
148
+ to_rgb=True),
149
+ dict(type='Pad', size_divisor=1),
150
+ dict(type='ImageToTensor', keys=['img']),
151
+ dict(type='Collect', keys=['img'])
152
+ ])
153
+ ]))
154
+ evaluation = dict(interval=1, metric='bbox')
155
+ checkpoint_config = dict(interval=1)
156
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
157
+ custom_hooks = [dict(type='NumClassCheckHook')]
158
+ dist_params = dict(backend='nccl')
159
+ log_level = 'INFO'
160
+ load_from = './ckpt/co_deformable_detr_swin_large_1x_coco.pth'
161
+ resume_from = None
162
+ workflow = [('train', 1)]
163
+ opencv_num_threads = 0
164
+ mp_start_method = 'fork'
165
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
166
+ num_dec_layer = 6
167
+ lambda_2 = 2.0
168
+ model = dict(
169
+ type='CoDETR',
170
+ backbone=dict(
171
+ type='SwinTransformerV1',
172
+ embed_dim=192,
173
+ depths=[2, 2, 18, 2],
174
+ num_heads=[6, 12, 24, 48],
175
+ out_indices=(1, 2, 3),
176
+ window_size=12,
177
+ ape=False,
178
+ drop_path_rate=0.3,
179
+ patch_norm=True,
180
+ use_checkpoint=False,
181
+ pretrained='./ckpt/co_deformable_detr_swin_large_1x_coco.pth'),
182
+ neck=dict(
183
+ type='ChannelMapper',
184
+ in_channels=[384, 768, 1536],
185
+ kernel_size=1,
186
+ out_channels=256,
187
+ act_cfg=None,
188
+ norm_cfg=dict(type='GN', num_groups=32),
189
+ num_outs=4),
190
+ rpn_head=dict(
191
+ type='RPNHead',
192
+ in_channels=256,
193
+ feat_channels=256,
194
+ anchor_generator=dict(
195
+ type='AnchorGenerator',
196
+ octave_base_scale=4,
197
+ scales_per_octave=3,
198
+ ratios=[0.5, 1.0, 2.0],
199
+ strides=[8, 16, 32, 64, 128]),
200
+ bbox_coder=dict(
201
+ type='DeltaXYWHBBoxCoder',
202
+ target_means=[0.0, 0.0, 0.0, 0.0],
203
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
204
+ loss_cls=dict(
205
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0),
206
+ loss_bbox=dict(type='L1Loss', loss_weight=12.0)),
207
+ query_head=dict(
208
+ type='CoDeformDETRHead',
209
+ num_query=300,
210
+ num_classes=5,
211
+ in_channels=2048,
212
+ sync_cls_avg_factor=True,
213
+ with_box_refine=True,
214
+ as_two_stage=True,
215
+ mixed_selection=True,
216
+ transformer=dict(
217
+ type='CoDeformableDetrTransformer',
218
+ num_co_heads=2,
219
+ encoder=dict(
220
+ type='DetrTransformerEncoder',
221
+ num_layers=6,
222
+ transformerlayers=dict(
223
+ type='BaseTransformerLayer',
224
+ attn_cfgs=dict(
225
+ type='MultiScaleDeformableAttention',
226
+ embed_dims=256,
227
+ dropout=0.0),
228
+ feedforward_channels=2048,
229
+ ffn_dropout=0.0,
230
+ operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
231
+ decoder=dict(
232
+ type='CoDeformableDetrTransformerDecoder',
233
+ num_layers=6,
234
+ return_intermediate=True,
235
+ look_forward_twice=True,
236
+ transformerlayers=dict(
237
+ type='DetrTransformerDecoderLayer',
238
+ attn_cfgs=[
239
+ dict(
240
+ type='MultiheadAttention',
241
+ embed_dims=256,
242
+ num_heads=8,
243
+ dropout=0.0),
244
+ dict(
245
+ type='MultiScaleDeformableAttention',
246
+ embed_dims=256,
247
+ dropout=0.0)
248
+ ],
249
+ feedforward_channels=2048,
250
+ ffn_dropout=0.0,
251
+ operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
252
+ 'ffn', 'norm')))),
253
+ positional_encoding=dict(
254
+ type='SinePositionalEncoding',
255
+ num_feats=128,
256
+ normalize=True,
257
+ offset=-0.5),
258
+ loss_cls=dict(
259
+ type='FocalLoss',
260
+ use_sigmoid=True,
261
+ gamma=2.0,
262
+ alpha=0.25,
263
+ loss_weight=2.0),
264
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
265
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
266
+ roi_head=[
267
+ dict(
268
+ type='CoStandardRoIHead',
269
+ bbox_roi_extractor=dict(
270
+ type='SingleRoIExtractor',
271
+ roi_layer=dict(
272
+ type='RoIAlign', output_size=7, sampling_ratio=0),
273
+ out_channels=256,
274
+ featmap_strides=[8, 16, 32, 64],
275
+ finest_scale=112),
276
+ bbox_head=dict(
277
+ type='Shared2FCBBoxHead',
278
+ in_channels=256,
279
+ fc_out_channels=1024,
280
+ roi_feat_size=7,
281
+ num_classes=5,
282
+ bbox_coder=dict(
283
+ type='DeltaXYWHBBoxCoder',
284
+ target_means=[0.0, 0.0, 0.0, 0.0],
285
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
286
+ reg_class_agnostic=False,
287
+ reg_decoded_bbox=True,
288
+ loss_cls=dict(
289
+ type='CrossEntropyLoss',
290
+ use_sigmoid=False,
291
+ loss_weight=12.0),
292
+ loss_bbox=dict(type='GIoULoss', loss_weight=120.0)))
293
+ ],
294
+ bbox_head=[
295
+ dict(
296
+ type='CoATSSHead',
297
+ num_classes=5,
298
+ in_channels=256,
299
+ stacked_convs=1,
300
+ feat_channels=256,
301
+ anchor_generator=dict(
302
+ type='AnchorGenerator',
303
+ ratios=[1.0],
304
+ octave_base_scale=8,
305
+ scales_per_octave=1,
306
+ strides=[8, 16, 32, 64, 128]),
307
+ bbox_coder=dict(
308
+ type='DeltaXYWHBBoxCoder',
309
+ target_means=[0.0, 0.0, 0.0, 0.0],
310
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
311
+ loss_cls=dict(
312
+ type='FocalLoss',
313
+ use_sigmoid=True,
314
+ gamma=2.0,
315
+ alpha=0.25,
316
+ loss_weight=12.0),
317
+ loss_bbox=dict(type='GIoULoss', loss_weight=24.0),
318
+ loss_centerness=dict(
319
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0))
320
+ ],
321
+ train_cfg=[
322
+ dict(
323
+ assigner=dict(
324
+ type='HungarianAssigner',
325
+ cls_cost=dict(type='FocalLossCost', weight=2.0),
326
+ reg_cost=dict(
327
+ type='BBoxL1Cost', weight=5.0, box_format='xywh'),
328
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
329
+ dict(
330
+ rpn=dict(
331
+ assigner=dict(
332
+ type='MaxIoUAssigner',
333
+ pos_iou_thr=0.7,
334
+ neg_iou_thr=0.3,
335
+ min_pos_iou=0.3,
336
+ match_low_quality=True,
337
+ ignore_iof_thr=-1),
338
+ sampler=dict(
339
+ type='RandomSampler',
340
+ num=256,
341
+ pos_fraction=0.5,
342
+ neg_pos_ub=-1,
343
+ add_gt_as_proposals=False),
344
+ allowed_border=-1,
345
+ pos_weight=-1,
346
+ debug=False),
347
+ rpn_proposal=dict(
348
+ nms_pre=4000,
349
+ max_per_img=1000,
350
+ nms=dict(type='nms', iou_threshold=0.7),
351
+ min_bbox_size=0),
352
+ rcnn=dict(
353
+ assigner=dict(
354
+ type='MaxIoUAssigner',
355
+ pos_iou_thr=0.5,
356
+ neg_iou_thr=0.5,
357
+ min_pos_iou=0.5,
358
+ match_low_quality=False,
359
+ ignore_iof_thr=-1),
360
+ sampler=dict(
361
+ type='RandomSampler',
362
+ num=512,
363
+ pos_fraction=0.25,
364
+ neg_pos_ub=-1,
365
+ add_gt_as_proposals=True),
366
+ pos_weight=-1,
367
+ debug=False)),
368
+ dict(
369
+ assigner=dict(type='ATSSAssigner', topk=9),
370
+ allowed_border=-1,
371
+ pos_weight=-1,
372
+ debug=False)
373
+ ],
374
+ test_cfg=[
375
+ dict(max_per_img=100),
376
+ dict(
377
+ rpn=dict(
378
+ nms_pre=1000,
379
+ max_per_img=1000,
380
+ nms=dict(type='nms', iou_threshold=0.7),
381
+ min_bbox_size=0),
382
+ rcnn=dict(
383
+ score_thr=0.0,
384
+ nms=dict(type='nms', iou_threshold=0.5),
385
+ max_per_img=100)),
386
+ dict(
387
+ nms_pre=1000,
388
+ min_bbox_size=0,
389
+ score_thr=0.0,
390
+ nms=dict(type='nms', iou_threshold=0.6),
391
+ max_per_img=100)
392
+ ])
393
+ optimizer = dict(
394
+ type='AdamW',
395
+ lr=0.0002,
396
+ weight_decay=0.05,
397
+ paramwise_cfg=dict(
398
+ custom_keys=dict(
399
+ backbone=dict(lr_mult=0.1),
400
+ sampling_offsets=dict(lr_mult=0.1),
401
+ reference_points=dict(lr_mult=0.1))))
402
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
403
+ lr_config = dict(policy='step', step=[11])
404
+ runner = dict(type='EpochBasedRunner', max_epochs=200)
405
+ pretrained = './ckpt/co_deformable_detr_swin_large_1x_coco.pth'
406
+ resume = False
407
+ work_dir = 'work_dirs/co_deformable_detr_swin_large_1x_ct'
408
+ auto_resume = False
409
+ gpu_ids = range(0, 8)
co_deformable_detr_swin_large_1x_ct/epoch_50.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21211f7d4b3e34daa235cb2f5840093e1fe7653329e421f8668bee6958cf12b7
3
+ size 2821415790
co_dino_5scale_r50_1x_ct/co_dino_5scale_r50_1x_ct.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = 'data/slice_dataset_maximum_0402/'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', with_bbox=True),
8
+ dict(type='RandomFlip', flip_ratio=0.5),
9
+ dict(
10
+ type='AutoAugment',
11
+ policies=[[{
12
+ 'type': 'Resize',
13
+ 'img_scale': [(512, 512)],
14
+ 'multiscale_mode': 'value',
15
+ 'keep_ratio': True
16
+ }],
17
+ [{
18
+ 'type': 'Resize',
19
+ 'img_scale': [(512, 512)],
20
+ 'multiscale_mode': 'value',
21
+ 'keep_ratio': True
22
+ }, {
23
+ 'type': 'RandomCrop',
24
+ 'crop_type': 'absolute_range',
25
+ 'crop_size': (512, 512),
26
+ 'allow_negative_crop': True
27
+ }, {
28
+ 'type': 'Resize',
29
+ 'img_scale': [(512, 512)],
30
+ 'multiscale_mode': 'value',
31
+ 'override': True,
32
+ 'keep_ratio': True
33
+ }]]),
34
+ dict(
35
+ type='Normalize',
36
+ mean=[123.675, 116.28, 103.53],
37
+ std=[58.395, 57.12, 57.375],
38
+ to_rgb=True),
39
+ dict(type='Pad', size_divisor=1),
40
+ dict(type='DefaultFormatBundle'),
41
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
42
+ ]
43
+ test_pipeline = [
44
+ dict(type='LoadImageFromFile'),
45
+ dict(
46
+ type='MultiScaleFlipAug',
47
+ img_scale=(512, 512),
48
+ flip=False,
49
+ transforms=[
50
+ dict(type='Resize', keep_ratio=True),
51
+ dict(type='RandomFlip'),
52
+ dict(
53
+ type='Normalize',
54
+ mean=[123.675, 116.28, 103.53],
55
+ std=[58.395, 57.12, 57.375],
56
+ to_rgb=True),
57
+ dict(type='Pad', size_divisor=1),
58
+ dict(type='ImageToTensor', keys=['img']),
59
+ dict(type='Collect', keys=['img'])
60
+ ])
61
+ ]
62
+ data = dict(
63
+ samples_per_gpu=8,
64
+ workers_per_gpu=4,
65
+ train=dict(
66
+ type='CocoDataset',
67
+ ann_file='data/slice_dataset_maximum_0402/annotations/train.json',
68
+ img_prefix='data/slice_dataset_maximum_0402/images/train/',
69
+ filter_empty_gt=False,
70
+ pipeline=[
71
+ dict(type='LoadImageFromFile'),
72
+ dict(type='LoadAnnotations', with_bbox=True),
73
+ dict(type='RandomFlip', flip_ratio=0.5),
74
+ dict(
75
+ type='AutoAugment',
76
+ policies=[[{
77
+ 'type': 'Resize',
78
+ 'img_scale': [(512, 512)],
79
+ 'multiscale_mode': 'value',
80
+ 'keep_ratio': True
81
+ }],
82
+ [{
83
+ 'type': 'Resize',
84
+ 'img_scale': [(512, 512)],
85
+ 'multiscale_mode': 'value',
86
+ 'keep_ratio': True
87
+ }, {
88
+ 'type': 'RandomCrop',
89
+ 'crop_type': 'absolute_range',
90
+ 'crop_size': (512, 512),
91
+ 'allow_negative_crop': True
92
+ }, {
93
+ 'type': 'Resize',
94
+ 'img_scale': [(512, 512)],
95
+ 'multiscale_mode': 'value',
96
+ 'override': True,
97
+ 'keep_ratio': True
98
+ }]]),
99
+ dict(
100
+ type='Normalize',
101
+ mean=[123.675, 116.28, 103.53],
102
+ std=[58.395, 57.12, 57.375],
103
+ to_rgb=True),
104
+ dict(type='Pad', size_divisor=1),
105
+ dict(type='DefaultFormatBundle'),
106
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
107
+ ]),
108
+ val=dict(
109
+ type='CocoDataset',
110
+ ann_file='data/slice_dataset_maximum_0402/annotations/test.json',
111
+ img_prefix='data/slice_dataset_maximum_0402/images/test/',
112
+ pipeline=[
113
+ dict(type='LoadImageFromFile'),
114
+ dict(
115
+ type='MultiScaleFlipAug',
116
+ img_scale=(512, 512),
117
+ flip=False,
118
+ transforms=[
119
+ dict(type='Resize', keep_ratio=True),
120
+ dict(type='RandomFlip'),
121
+ dict(
122
+ type='Normalize',
123
+ mean=[123.675, 116.28, 103.53],
124
+ std=[58.395, 57.12, 57.375],
125
+ to_rgb=True),
126
+ dict(type='Pad', size_divisor=1),
127
+ dict(type='ImageToTensor', keys=['img']),
128
+ dict(type='Collect', keys=['img'])
129
+ ])
130
+ ]),
131
+ test=dict(
132
+ type='CocoDataset',
133
+ ann_file='data/slice_dataset_maximum_0402/annotations/test.json',
134
+ img_prefix='data/slice_dataset_maximum_0402/images/test/',
135
+ pipeline=[
136
+ dict(type='LoadImageFromFile'),
137
+ dict(
138
+ type='MultiScaleFlipAug',
139
+ img_scale=(512, 512),
140
+ flip=False,
141
+ transforms=[
142
+ dict(type='Resize', keep_ratio=True),
143
+ dict(type='RandomFlip'),
144
+ dict(
145
+ type='Normalize',
146
+ mean=[123.675, 116.28, 103.53],
147
+ std=[58.395, 57.12, 57.375],
148
+ to_rgb=True),
149
+ dict(type='Pad', size_divisor=1),
150
+ dict(type='ImageToTensor', keys=['img']),
151
+ dict(type='Collect', keys=['img'])
152
+ ])
153
+ ]))
154
+ evaluation = dict(interval=1, metric='bbox')
155
+ checkpoint_config = dict(interval=1)
156
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
157
+ custom_hooks = [dict(type='NumClassCheckHook')]
158
+ dist_params = dict(backend='nccl')
159
+ log_level = 'INFO'
160
+ load_from = './ckpt/co_dino_5scale_r50_1x_coco.pth'
161
+ resume_from = None
162
+ workflow = [('train', 1)]
163
+ opencv_num_threads = 0
164
+ mp_start_method = 'fork'
165
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
166
+ num_dec_layer = 6
167
+ lambda_2 = 2.0
168
+ model = dict(
169
+ type='CoDETR',
170
+ backbone=dict(
171
+ type='ResNet',
172
+ depth=50,
173
+ num_stages=4,
174
+ out_indices=(0, 1, 2, 3),
175
+ frozen_stages=1,
176
+ norm_cfg=dict(type='BN', requires_grad=False),
177
+ norm_eval=True,
178
+ style='pytorch',
179
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
180
+ neck=dict(
181
+ type='ChannelMapper',
182
+ in_channels=[256, 512, 1024, 2048],
183
+ kernel_size=1,
184
+ out_channels=256,
185
+ act_cfg=None,
186
+ norm_cfg=dict(type='GN', num_groups=32),
187
+ num_outs=5),
188
+ rpn_head=dict(
189
+ type='RPNHead',
190
+ in_channels=256,
191
+ feat_channels=256,
192
+ anchor_generator=dict(
193
+ type='AnchorGenerator',
194
+ octave_base_scale=4,
195
+ scales_per_octave=3,
196
+ ratios=[0.5, 1.0, 2.0],
197
+ strides=[4, 8, 16, 32, 64, 128]),
198
+ bbox_coder=dict(
199
+ type='DeltaXYWHBBoxCoder',
200
+ target_means=[0.0, 0.0, 0.0, 0.0],
201
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
202
+ loss_cls=dict(
203
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0),
204
+ loss_bbox=dict(type='L1Loss', loss_weight=12.0)),
205
+ query_head=dict(
206
+ type='CoDINOHead',
207
+ num_query=900,
208
+ num_classes=5,
209
+ num_feature_levels=5,
210
+ in_channels=2048,
211
+ sync_cls_avg_factor=True,
212
+ as_two_stage=True,
213
+ with_box_refine=True,
214
+ mixed_selection=True,
215
+ dn_cfg=dict(
216
+ type='CdnQueryGenerator',
217
+ noise_scale=dict(label=0.5, box=1.0),
218
+ group_cfg=dict(dynamic=True, num_groups=None, num_dn_queries=100)),
219
+ transformer=dict(
220
+ type='CoDinoTransformer',
221
+ with_pos_coord=True,
222
+ with_coord_feat=False,
223
+ num_co_heads=2,
224
+ num_feature_levels=5,
225
+ encoder=dict(
226
+ type='DetrTransformerEncoder',
227
+ num_layers=6,
228
+ with_cp=4,
229
+ transformerlayers=dict(
230
+ type='BaseTransformerLayer',
231
+ attn_cfgs=dict(
232
+ type='MultiScaleDeformableAttention',
233
+ embed_dims=256,
234
+ num_levels=5,
235
+ dropout=0.0),
236
+ feedforward_channels=2048,
237
+ ffn_dropout=0.0,
238
+ operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
239
+ decoder=dict(
240
+ type='DinoTransformerDecoder',
241
+ num_layers=6,
242
+ return_intermediate=True,
243
+ transformerlayers=dict(
244
+ type='DetrTransformerDecoderLayer',
245
+ attn_cfgs=[
246
+ dict(
247
+ type='MultiheadAttention',
248
+ embed_dims=256,
249
+ num_heads=8,
250
+ dropout=0.0),
251
+ dict(
252
+ type='MultiScaleDeformableAttention',
253
+ embed_dims=256,
254
+ num_levels=5,
255
+ dropout=0.0)
256
+ ],
257
+ feedforward_channels=2048,
258
+ ffn_dropout=0.0,
259
+ operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
260
+ 'ffn', 'norm')))),
261
+ positional_encoding=dict(
262
+ type='SinePositionalEncoding',
263
+ num_feats=128,
264
+ temperature=20,
265
+ normalize=True),
266
+ loss_cls=dict(
267
+ type='QualityFocalLoss',
268
+ use_sigmoid=True,
269
+ beta=2.0,
270
+ loss_weight=1.0),
271
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
272
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
273
+ roi_head=[
274
+ dict(
275
+ type='CoStandardRoIHead',
276
+ bbox_roi_extractor=dict(
277
+ type='SingleRoIExtractor',
278
+ roi_layer=dict(
279
+ type='RoIAlign', output_size=7, sampling_ratio=0),
280
+ out_channels=256,
281
+ featmap_strides=[4, 8, 16, 32, 64],
282
+ finest_scale=56),
283
+ bbox_head=dict(
284
+ type='Shared2FCBBoxHead',
285
+ in_channels=256,
286
+ fc_out_channels=1024,
287
+ roi_feat_size=7,
288
+ num_classes=5,
289
+ bbox_coder=dict(
290
+ type='DeltaXYWHBBoxCoder',
291
+ target_means=[0.0, 0.0, 0.0, 0.0],
292
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
293
+ reg_class_agnostic=False,
294
+ reg_decoded_bbox=True,
295
+ loss_cls=dict(
296
+ type='CrossEntropyLoss',
297
+ use_sigmoid=False,
298
+ loss_weight=12.0),
299
+ loss_bbox=dict(type='GIoULoss', loss_weight=120.0)))
300
+ ],
301
+ bbox_head=[
302
+ dict(
303
+ type='CoATSSHead',
304
+ num_classes=5,
305
+ in_channels=256,
306
+ stacked_convs=1,
307
+ feat_channels=256,
308
+ anchor_generator=dict(
309
+ type='AnchorGenerator',
310
+ ratios=[1.0],
311
+ octave_base_scale=8,
312
+ scales_per_octave=1,
313
+ strides=[4, 8, 16, 32, 64, 128]),
314
+ bbox_coder=dict(
315
+ type='DeltaXYWHBBoxCoder',
316
+ target_means=[0.0, 0.0, 0.0, 0.0],
317
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
318
+ loss_cls=dict(
319
+ type='FocalLoss',
320
+ use_sigmoid=True,
321
+ gamma=2.0,
322
+ alpha=0.25,
323
+ loss_weight=12.0),
324
+ loss_bbox=dict(type='GIoULoss', loss_weight=24.0),
325
+ loss_centerness=dict(
326
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0))
327
+ ],
328
+ train_cfg=[
329
+ dict(
330
+ assigner=dict(
331
+ type='HungarianAssigner',
332
+ cls_cost=dict(type='FocalLossCost', weight=2.0),
333
+ reg_cost=dict(
334
+ type='BBoxL1Cost', weight=5.0, box_format='xywh'),
335
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
336
+ dict(
337
+ rpn=dict(
338
+ assigner=dict(
339
+ type='MaxIoUAssigner',
340
+ pos_iou_thr=0.7,
341
+ neg_iou_thr=0.3,
342
+ min_pos_iou=0.3,
343
+ match_low_quality=True,
344
+ ignore_iof_thr=-1),
345
+ sampler=dict(
346
+ type='RandomSampler',
347
+ num=256,
348
+ pos_fraction=0.5,
349
+ neg_pos_ub=-1,
350
+ add_gt_as_proposals=False),
351
+ allowed_border=-1,
352
+ pos_weight=-1,
353
+ debug=False),
354
+ rpn_proposal=dict(
355
+ nms_pre=4000,
356
+ max_per_img=1000,
357
+ nms=dict(type='nms', iou_threshold=0.7),
358
+ min_bbox_size=0),
359
+ rcnn=dict(
360
+ assigner=dict(
361
+ type='MaxIoUAssigner',
362
+ pos_iou_thr=0.5,
363
+ neg_iou_thr=0.5,
364
+ min_pos_iou=0.5,
365
+ match_low_quality=False,
366
+ ignore_iof_thr=-1),
367
+ sampler=dict(
368
+ type='RandomSampler',
369
+ num=512,
370
+ pos_fraction=0.25,
371
+ neg_pos_ub=-1,
372
+ add_gt_as_proposals=True),
373
+ pos_weight=-1,
374
+ debug=False)),
375
+ dict(
376
+ assigner=dict(type='ATSSAssigner', topk=9),
377
+ allowed_border=-1,
378
+ pos_weight=-1,
379
+ debug=False)
380
+ ],
381
+ test_cfg=[
382
+ dict(max_per_img=300, nms=dict(type='soft_nms', iou_threshold=0.8)),
383
+ dict(
384
+ rpn=dict(
385
+ nms_pre=1000,
386
+ max_per_img=1000,
387
+ nms=dict(type='nms', iou_threshold=0.7),
388
+ min_bbox_size=0),
389
+ rcnn=dict(
390
+ score_thr=0.0,
391
+ nms=dict(type='nms', iou_threshold=0.5),
392
+ max_per_img=100)),
393
+ dict(
394
+ nms_pre=1000,
395
+ min_bbox_size=0,
396
+ score_thr=0.0,
397
+ nms=dict(type='nms', iou_threshold=0.6),
398
+ max_per_img=100)
399
+ ])
400
+ optimizer = dict(
401
+ type='AdamW',
402
+ lr=0.0002,
403
+ weight_decay=0.0001,
404
+ paramwise_cfg=dict(custom_keys=dict(backbone=dict(lr_mult=0.1))))
405
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
406
+ lr_config = dict(policy='step', step=[11])
407
+ runner = dict(type='EpochBasedRunner', max_epochs=200)
408
+ pretrained = './ckpt/co_dino_5scale_r50_1x_coco.pth'
409
+ work_dir = 'work_dirs/co_dino_5scale_r50_1x_ct'
410
+ auto_resume = False
411
+ gpu_ids = range(0, 8)
co_dino_5scale_r50_1x_ct/epoch_50.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f743702df7c27116b9cc6ad7492b54e0f8c6a2f7392de8600fcc6cf8481c7789
3
+ size 772477915