KyanChen commited on
Commit
3b96cb1
1 Parent(s): bb15694

Upload 1861 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. .gitignore +128 -0
  3. app.py +42 -0
  4. ckpt/epoch_270.pth +3 -0
  5. configs/.DS_Store +0 -0
  6. configs/TTP/ttp_sam_large_levircd.py +202 -0
  7. configs/TTP/ttp_sam_large_levircd_fp16.py +201 -0
  8. configs/TTP/ttp_sam_large_levircd_infer.py +199 -0
  9. demo/MMSegmentation_Tutorial.ipynb +555 -0
  10. demo/classroom__rgb_00283.jpg +0 -0
  11. demo/demo.png +0 -0
  12. demo/image_demo.py +51 -0
  13. demo/image_demo_with_inferencer.py +54 -0
  14. demo/inference_demo.ipynb +120 -0
  15. demo/rs_image_inference.py +50 -0
  16. demo/video_demo.py +112 -0
  17. mmdet/.DS_Store +0 -0
  18. mmdet/__init__.py +27 -0
  19. mmdet/__pycache__/__init__.cpython-311.pyc +0 -0
  20. mmdet/__pycache__/registry.cpython-311.pyc +0 -0
  21. mmdet/__pycache__/version.cpython-311.pyc +0 -0
  22. mmdet/apis/__init__.py +9 -0
  23. mmdet/apis/det_inferencer.py +644 -0
  24. mmdet/apis/inference.py +372 -0
  25. mmdet/configs/.DS_Store +0 -0
  26. mmdet/configs/_base_/datasets/coco_detection.py +104 -0
  27. mmdet/configs/_base_/datasets/coco_instance.py +106 -0
  28. mmdet/configs/_base_/datasets/coco_instance_semantic.py +87 -0
  29. mmdet/configs/_base_/datasets/coco_panoptic.py +105 -0
  30. mmdet/configs/_base_/datasets/mot_challenge.py +101 -0
  31. mmdet/configs/_base_/default_runtime.py +33 -0
  32. mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +220 -0
  33. mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py +201 -0
  34. mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py +138 -0
  35. mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py +158 -0
  36. mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py +154 -0
  37. mmdet/configs/_base_/models/retinanet_r50_fpn.py +77 -0
  38. mmdet/configs/_base_/schedules/schedule_1x.py +33 -0
  39. mmdet/configs/_base_/schedules/schedule_2x.py +33 -0
  40. mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py +13 -0
  41. mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py +13 -0
  42. mmdet/configs/common/lsj_100e_coco_detection.py +134 -0
  43. mmdet/configs/common/lsj_100e_coco_instance.py +134 -0
  44. mmdet/configs/common/lsj_200e_coco_detection.py +25 -0
  45. mmdet/configs/common/lsj_200e_coco_instance.py +25 -0
  46. mmdet/configs/common/ms_3x_coco.py +130 -0
  47. mmdet/configs/common/ms_3x_coco_instance.py +136 -0
  48. mmdet/configs/common/ms_90k_coco.py +151 -0
  49. mmdet/configs/common/ms_poly_3x_coco_instance.py +138 -0
  50. mmdet/configs/common/ms_poly_90k_coco_instance.py +153 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ samples/A/test_1.png filter=lfs diff=lfs merge=lfs -text
37
+ samples/A/test_2.png filter=lfs diff=lfs merge=lfs -text
38
+ samples/A/test_3.png filter=lfs diff=lfs merge=lfs -text
39
+ samples/A/test_4.png filter=lfs diff=lfs merge=lfs -text
40
+ samples/A/test_5.png filter=lfs diff=lfs merge=lfs -text
41
+ samples/B/test_1.png filter=lfs diff=lfs merge=lfs -text
42
+ samples/B/test_2.png filter=lfs diff=lfs merge=lfs -text
43
+ samples/B/test_3.png filter=lfs diff=lfs merge=lfs -text
44
+ samples/B/test_4.png filter=lfs diff=lfs merge=lfs -text
45
+ samples/B/test_5.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.pth
2
+ gradio_cached_examples/
3
+
4
+ .idea
5
+ .DS_Store
6
+ work_dirs/
7
+ pretrain_models/
8
+
9
+ # Byte-compiled / optimized / DLL files
10
+ __pycache__/
11
+ *.py[cod]
12
+ *$py.class
13
+
14
+ # C extensions
15
+ *.so
16
+
17
+ # Distribution / packaging
18
+ .Python
19
+ build/
20
+ develop-eggs/
21
+ dist/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ var/
30
+ wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ MANIFEST
35
+
36
+ # PyInstaller
37
+ # Usually these files are written by a python script from a template
38
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
39
+ *.manifest
40
+ *.spec
41
+
42
+ # Installer logs
43
+ pip-log.txt
44
+ pip-delete-this-directory.txt
45
+
46
+ # Unit test / coverage reports
47
+ htmlcov/
48
+ .tox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/en/_build/
76
+ docs/zh_cn/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # celery beat schedule file
88
+ celerybeat-schedule
89
+
90
+ # SageMath parsed files
91
+ *.sage.py
92
+
93
+ # Environments
94
+ .env
95
+ .venv
96
+ env/
97
+ venv/
98
+ ENV/
99
+ env.bak/
100
+ venv.bak/
101
+ .DS_Store
102
+
103
+ # Spyder project settings
104
+ .spyderproject
105
+ .spyproject
106
+
107
+ # Rope project settings
108
+ .ropeproject
109
+
110
+ # mkdocs documentation
111
+ /site
112
+
113
+ # mypy
114
+ .mypy_cache/
115
+
116
+ data
117
+ .vscode
118
+ .idea
119
+
120
+ # custom
121
+ *.pkl
122
+ *.pkl.json
123
+ *.log.json
124
+ work_dirs/
125
+ mmseg/.mim
126
+
127
+ # Pytorch
128
+ *.pth
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import glob
3
+ import torch
4
+ from opencd.apis import OpenCDInferencer
5
+
6
+ device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
7
+
8
+ config_file = 'configs/TTP/ttp_sam_large_levircd_infer.py'
9
+ checkpoint_file = 'ckpt/epoch_270.pth'
10
+
11
+ # build the model from a config file and a checkpoint file
12
+ mmcd_inferencer = OpenCDInferencer(
13
+ model=config_file,
14
+ weights=checkpoint_file,
15
+ classes=['unchanged', 'changed'],
16
+ palette=[[0, 0, 0], [255, 255, 255]],
17
+ device=device
18
+ )
19
+
20
+ def infer(img1, img2):
21
+ # test a single image
22
+ result = mmcd_inferencer([[img1, img2]], show=False, return_vis=True)
23
+ visualization = result['visualization']
24
+ return visualization
25
+
26
+
27
+ with gr.Blocks() as demo:
28
+ with gr.Row():
29
+ input_0 = gr.Image(label='Input Image1')
30
+ input_1 = gr.Image(label='Input Image2')
31
+ with gr.Row():
32
+ output_gt = gr.Image(label='Predicted Mask')
33
+ btn = gr.Button("Detect")
34
+ btn.click(infer, inputs=[input_0, input_1], outputs=[output_gt])
35
+
36
+ img1_files = glob.glob('samples/A/*.png')
37
+ img2_files = [f.replace('A', 'B') for f in img1_files]
38
+ input_files = [[x, y] for x, y in zip(img1_files, img2_files)]
39
+ gr.Examples(input_files, fn=infer, inputs=[input_0, input_1], outputs=[output_gt], cache_examples=True)
40
+
41
+ if __name__ == "__main__":
42
+ demo.launch()
ckpt/epoch_270.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37d3a79379f4bf3d7ecb85b71209f35cd8af7e61cae564038397e8b7fb3eaf2
3
+ size 1415063308
configs/.DS_Store ADDED
Binary file (6.15 kB). View file
 
configs/TTP/ttp_sam_large_levircd.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'opencd'
2
+
3
+ work_dir = 'work_dirs/lervicd/ttp_sam_large_levircd'
4
+
5
+ custom_imports = dict(imports=['mmseg.ttp'], allow_failed_imports=False)
6
+
7
+ env_cfg = dict(
8
+ cudnn_benchmark=True,
9
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
10
+ dist_cfg=dict(backend='nccl'),
11
+ )
12
+ default_hooks = dict(
13
+ timer=dict(type='IterTimerHook'),
14
+ logger=dict(type='LoggerHook', interval=10, log_metric_by_epoch=True),
15
+ param_scheduler=dict(type='ParamSchedulerHook'),
16
+ checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=10, save_best='cd/iou_changed', max_keep_ckpts=5, greater_keys=['cd/iou_changed'], save_last=True),
17
+ sampler_seed=dict(type='DistSamplerSeedHook'),
18
+ visualization=dict(type='CDVisualizationHook', interval=1,
19
+ img_shape=(1024, 1024, 3))
20
+ )
21
+ vis_backends = [dict(type='CDLocalVisBackend'),
22
+ dict(type='WandbVisBackend',
23
+ init_kwargs=dict(project='samcd', group='levircd', name='ttp_sam_large_levircd'))
24
+ ]
25
+
26
+ visualizer = dict(
27
+ type='CDLocalVisualizer',
28
+ vis_backends=vis_backends, name='visualizer', alpha=1.0)
29
+ log_processor = dict(by_epoch=True)
30
+
31
+ log_level = 'INFO'
32
+ load_from = None
33
+ resume = False
34
+
35
+ crop_size = (512, 512)
36
+
37
+ data_preprocessor = dict(
38
+ type='DualInputSegDataPreProcessor',
39
+ mean=[123.675, 116.28, 103.53] * 2,
40
+ std=[58.395, 57.12, 57.375] * 2,
41
+ bgr_to_rgb=True,
42
+ pad_val=0,
43
+ seg_pad_val=255,
44
+ size_divisor=32,
45
+ test_cfg=dict(size_divisor=32)
46
+ )
47
+
48
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
49
+ fpn_norm_cfg = dict(type='LN2d', requires_grad=True)
50
+
51
+ sam_pretrain_ckpt_path = 'https://download.openmmlab.com/mmclassification/v1/vit_sam/vit-large-p16_sam-pre_3rdparty_sa1b-1024px_20230411-595feafd.pth'
52
+
53
+ model = dict(
54
+ type='SiamEncoderDecoder',
55
+ data_preprocessor=data_preprocessor,
56
+ backbone=dict(
57
+ type='MMPretrainSamVisionEncoder',
58
+ encoder_cfg=dict(
59
+ type='mmpretrain.ViTSAM',
60
+ arch='large',
61
+ img_size=crop_size[0],
62
+ patch_size=16,
63
+ out_channels=256,
64
+ use_abs_pos=True,
65
+ use_rel_pos=True,
66
+ window_size=14,
67
+ layer_cfgs=dict(type='TimeFusionTransformerEncoderLayer'),
68
+ init_cfg=dict(type='Pretrained', checkpoint=sam_pretrain_ckpt_path, prefix='backbone.'),
69
+ ),
70
+ peft_cfg=dict(
71
+ r=16,
72
+ target_modules=["qkv"],
73
+ lora_dropout=0.01,
74
+ bias='lora_only',
75
+ ),
76
+ ),
77
+ neck=dict(
78
+ type='SequentialNeck',
79
+ necks=[
80
+ dict(
81
+ type='FeatureFusionNeck',
82
+ policy='concat',
83
+ out_indices=(0,)),
84
+ dict(
85
+ type='SimpleFPN',
86
+ backbone_channel=512,
87
+ in_channels=[128, 256, 512, 512],
88
+ out_channels=256,
89
+ num_outs=5,
90
+ norm_cfg=fpn_norm_cfg),
91
+ ],
92
+ ),
93
+ decode_head=dict(
94
+ type='MLPSegHead',
95
+ out_size=(128, 128),
96
+ in_channels=[256]*5,
97
+ in_index=[0, 1, 2, 3, 4],
98
+ channels=256,
99
+ dropout_ratio=0,
100
+ num_classes=2,
101
+ norm_cfg=norm_cfg,
102
+ align_corners=False,
103
+ loss_decode=dict(
104
+ type='mmseg.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
105
+ train_cfg=dict(),
106
+ test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2))
107
+ ) # yapf: disable
108
+
109
+ dataset_type = 'LEVIR_CD_Dataset'
110
+ data_root = '/mnt/levir_datasets/levir-cd'
111
+
112
+
113
+ train_pipeline = [
114
+ dict(type='MultiImgLoadImageFromFile'),
115
+ dict(type='MultiImgLoadAnnotations'),
116
+ dict(type='MultiImgRandomRotate', prob=0.5, degree=180),
117
+ dict(type='MultiImgRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
118
+ dict(type='MultiImgRandomFlip', prob=0.5, direction='horizontal'),
119
+ dict(type='MultiImgRandomFlip', prob=0.5, direction='vertical'),
120
+ # dict(type='MultiImgExchangeTime', prob=0.5),
121
+ dict(
122
+ type='MultiImgPhotoMetricDistortion',
123
+ brightness_delta=10,
124
+ contrast_range=(0.8, 1.2),
125
+ saturation_range=(0.8, 1.2),
126
+ hue_delta=10),
127
+ dict(type='MultiImgPackSegInputs')
128
+ ]
129
+ test_pipeline = [
130
+ dict(type='MultiImgLoadImageFromFile'),
131
+ dict(type='MultiImgResize', scale=(1024, 1024), keep_ratio=True),
132
+ # add loading annotation after ``Resize`` because ground truth
133
+ # does not need to do resize data transform
134
+ dict(type='MultiImgLoadAnnotations'),
135
+ dict(type='MultiImgPackSegInputs')
136
+ ]
137
+
138
+ batch_size_per_gpu = 2
139
+
140
+ train_dataloader = dict(
141
+ batch_size=batch_size_per_gpu,
142
+ num_workers=8,
143
+ persistent_workers=True,
144
+ sampler=dict(type='DefaultSampler', shuffle=True),
145
+ dataset=dict(
146
+ type=dataset_type,
147
+ data_root=data_root,
148
+ data_prefix=dict(
149
+ seg_map_path='train/label',
150
+ img_path_from='train/A',
151
+ img_path_to='train/B'),
152
+ pipeline=train_pipeline)
153
+ )
154
+
155
+ val_dataloader = dict(
156
+ batch_size=1,
157
+ num_workers=4,
158
+ persistent_workers=True,
159
+ sampler=dict(type='DefaultSampler', shuffle=False),
160
+ dataset=dict(
161
+ type=dataset_type,
162
+ data_root=data_root,
163
+ data_prefix=dict(
164
+ seg_map_path='test/label',
165
+ img_path_from='test/A',
166
+ img_path_to='test/B'),
167
+ pipeline=test_pipeline)
168
+ )
169
+
170
+ test_dataloader = val_dataloader
171
+
172
+ val_evaluator = dict(
173
+ type='CDMetric',
174
+ )
175
+ test_evaluator = val_evaluator
176
+
177
+ max_epochs = 300
178
+ base_lr = 0.0004
179
+ param_scheduler = [
180
+ dict(
181
+ type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=5, convert_to_iter_based=True),
182
+ dict(
183
+ type='CosineAnnealingLR',
184
+ T_max=max_epochs,
185
+ begin=5,
186
+ by_epoch=True,
187
+ end=max_epochs,
188
+ convert_to_iter_based=True
189
+ ),
190
+ ]
191
+
192
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=5)
193
+ val_cfg = dict(type='ValLoop')
194
+ test_cfg = dict(type='TestLoop')
195
+
196
+
197
+ optim_wrapper = dict(
198
+ type='OptimWrapper',
199
+ optimizer=dict(
200
+ type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05),
201
+ )
202
+
configs/TTP/ttp_sam_large_levircd_fp16.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'opencd'
2
+
3
+ work_dir = 'work_dirs/lervicd/ttp_sam_large_levircd_fp16'
4
+
5
+ custom_imports = dict(imports=['mmseg.ttp'], allow_failed_imports=False)
6
+
7
+ env_cfg = dict(
8
+ cudnn_benchmark=True,
9
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
10
+ dist_cfg=dict(backend='nccl'),
11
+ )
12
+ default_hooks = dict(
13
+ timer=dict(type='IterTimerHook'),
14
+ logger=dict(type='LoggerHook', interval=10, log_metric_by_epoch=True),
15
+ param_scheduler=dict(type='ParamSchedulerHook'),
16
+ checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=10, save_best='cd/iou_changed', max_keep_ckpts=5, greater_keys=['cd/iou_changed'], save_last=True),
17
+ sampler_seed=dict(type='DistSamplerSeedHook'),
18
+ visualization=dict(type='CDVisualizationHook', interval=1, img_shape=(1024, 1024, 3))
19
+ )
20
+ vis_backends = [dict(type='CDLocalVisBackend'),
21
+ dict(type='WandbVisBackend', init_kwargs=dict(project='samcd', group='levircd', name='ttp_sam_large_levircd_fp16'))
22
+ ]
23
+
24
+ visualizer = dict(
25
+ type='CDLocalVisualizer',
26
+ vis_backends=vis_backends, name='visualizer', alpha=1.0)
27
+ log_processor = dict(by_epoch=True)
28
+
29
+ log_level = 'INFO'
30
+ load_from = None
31
+ resume = False
32
+
33
+ crop_size = (512, 512)
34
+
35
+ data_preprocessor = dict(
36
+ type='DualInputSegDataPreProcessor',
37
+ mean=[123.675, 116.28, 103.53] * 2,
38
+ std=[58.395, 57.12, 57.375] * 2,
39
+ bgr_to_rgb=True,
40
+ pad_val=0,
41
+ seg_pad_val=255,
42
+ size_divisor=32,
43
+ test_cfg=dict(size_divisor=32)
44
+ )
45
+
46
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
47
+ fpn_norm_cfg = dict(type='LN2d', requires_grad=True)
48
+
49
+ sam_pretrain_ckpt_path = 'https://download.openmmlab.com/mmclassification/v1/vit_sam/vit-large-p16_sam-pre_3rdparty_sa1b-1024px_20230411-595feafd.pth'
50
+
51
+ model = dict(
52
+ type='SiamEncoderDecoder',
53
+ data_preprocessor=data_preprocessor,
54
+ backbone=dict(
55
+ type='MMPretrainSamVisionEncoder',
56
+ encoder_cfg=dict(
57
+ type='mmpretrain.ViTSAM',
58
+ arch='large',
59
+ img_size=crop_size[0],
60
+ patch_size=16,
61
+ out_channels=256,
62
+ use_abs_pos=True,
63
+ use_rel_pos=True,
64
+ window_size=14,
65
+ layer_cfgs=dict(type='TimeFusionTransformerEncoderLayer'),
66
+ init_cfg=dict(type='Pretrained', checkpoint=sam_pretrain_ckpt_path, prefix='backbone.'),
67
+ ),
68
+ peft_cfg=dict(
69
+ r=16,
70
+ target_modules=["qkv"],
71
+ lora_dropout=0.01,
72
+ bias='lora_only',
73
+ ),
74
+ ),
75
+ neck=dict(
76
+ type='SequentialNeck',
77
+ necks=[
78
+ dict(
79
+ type='FeatureFusionNeck',
80
+ policy='concat',
81
+ out_indices=(0,)),
82
+ dict(
83
+ type='SimpleFPN',
84
+ backbone_channel=512,
85
+ in_channels=[128, 256, 512, 512],
86
+ out_channels=256,
87
+ num_outs=5,
88
+ norm_cfg=fpn_norm_cfg),
89
+ ],
90
+ ),
91
+ decode_head=dict(
92
+ type='MLPSegHead',
93
+ out_size=(128, 128),
94
+ in_channels=[256]*5,
95
+ in_index=[0, 1, 2, 3, 4],
96
+ channels=256,
97
+ dropout_ratio=0,
98
+ num_classes=2,
99
+ norm_cfg=norm_cfg,
100
+ align_corners=False,
101
+ loss_decode=dict(
102
+ type='mmseg.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
103
+ train_cfg=dict(),
104
+ test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2))
105
+ ) # yapf: disable
106
+
107
+ dataset_type = 'LEVIR_CD_Dataset'
108
+ data_root = '/mnt/levir_datasets/levir-cd'
109
+
110
+
111
+ train_pipeline = [
112
+ dict(type='MultiImgLoadImageFromFile'),
113
+ dict(type='MultiImgLoadAnnotations'),
114
+ dict(type='MultiImgRandomRotate', prob=0.5, degree=180),
115
+ dict(type='MultiImgRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
116
+ dict(type='MultiImgRandomFlip', prob=0.5, direction='horizontal'),
117
+ dict(type='MultiImgRandomFlip', prob=0.5, direction='vertical'),
118
+ # dict(type='MultiImgExchangeTime', prob=0.5),
119
+ dict(
120
+ type='MultiImgPhotoMetricDistortion',
121
+ brightness_delta=10,
122
+ contrast_range=(0.8, 1.2),
123
+ saturation_range=(0.8, 1.2),
124
+ hue_delta=10),
125
+ dict(type='MultiImgPackSegInputs')
126
+ ]
127
+ test_pipeline = [
128
+ dict(type='MultiImgLoadImageFromFile'),
129
+ dict(type='MultiImgResize', scale=(1024, 1024), keep_ratio=True),
130
+ # add loading annotation after ``Resize`` because ground truth
131
+ # does not need to do resize data transform
132
+ dict(type='MultiImgLoadAnnotations'),
133
+ dict(type='MultiImgPackSegInputs')
134
+ ]
135
+
136
+ batch_size_per_gpu = 2
137
+
138
+ train_dataloader = dict(
139
+ batch_size=batch_size_per_gpu,
140
+ num_workers=8,
141
+ persistent_workers=True,
142
+ sampler=dict(type='DefaultSampler', shuffle=True),
143
+ dataset=dict(
144
+ type=dataset_type,
145
+ data_root=data_root,
146
+ data_prefix=dict(
147
+ seg_map_path='train/label',
148
+ img_path_from='train/A',
149
+ img_path_to='train/B'),
150
+ pipeline=train_pipeline)
151
+ )
152
+
153
+ val_dataloader = dict(
154
+ batch_size=1,
155
+ num_workers=4,
156
+ persistent_workers=True,
157
+ sampler=dict(type='DefaultSampler', shuffle=False),
158
+ dataset=dict(
159
+ type=dataset_type,
160
+ data_root=data_root,
161
+ data_prefix=dict(
162
+ seg_map_path='test/label',
163
+ img_path_from='test/A',
164
+ img_path_to='test/B'),
165
+ pipeline=test_pipeline)
166
+ )
167
+
168
+ test_dataloader = val_dataloader
169
+
170
+ val_evaluator = dict(
171
+ type='CDMetric',
172
+ )
173
+ test_evaluator = val_evaluator
174
+
175
+ max_epochs = 300
176
+ base_lr = 0.0004
177
+ param_scheduler = [
178
+ dict(
179
+ type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=5, convert_to_iter_based=True),
180
+ dict(
181
+ type='CosineAnnealingLR',
182
+ T_max=max_epochs,
183
+ begin=5,
184
+ by_epoch=True,
185
+ end=max_epochs,
186
+ convert_to_iter_based=True
187
+ ),
188
+ ]
189
+
190
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=5)
191
+ val_cfg = dict(type='ValLoop')
192
+ test_cfg = dict(type='TestLoop')
193
+
194
+
195
+ optim_wrapper = dict(
196
+ type='AmpOptimWrapper',
197
+ optimizer=dict(
198
+ type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05),
199
+ dtype='float16',
200
+ )
201
+
configs/TTP/ttp_sam_large_levircd_infer.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'opencd'
2
+
3
+ work_dir = 'work_dirs/lervicd/ttp_sam_large_levircd'
4
+
5
+ custom_imports = dict(imports=['mmseg.ttp'], allow_failed_imports=False)
6
+
7
+ env_cfg = dict(
8
+ cudnn_benchmark=True,
9
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
10
+ dist_cfg=dict(backend='nccl'),
11
+ )
12
+ default_hooks = dict(
13
+ timer=dict(type='IterTimerHook'),
14
+ logger=dict(type='LoggerHook', interval=10, log_metric_by_epoch=True),
15
+ param_scheduler=dict(type='ParamSchedulerHook'),
16
+ checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=10, save_best='cd/iou_changed', max_keep_ckpts=5, greater_keys=['cd/iou_changed'], save_last=True),
17
+ sampler_seed=dict(type='DistSamplerSeedHook'),
18
+ visualization=dict(type='CDVisualizationHook', interval=1,
19
+ img_shape=(1024, 1024, 3))
20
+ )
21
+ vis_backends = [dict(type='CDLocalVisBackend')]
22
+
23
+ visualizer = dict(
24
+ type='CDLocalVisualizer',
25
+ vis_backends=vis_backends, name='visualizer', alpha=1.0)
26
+ log_processor = dict(by_epoch=True)
27
+
28
+ log_level = 'INFO'
29
+ load_from = None
30
+ resume = False
31
+
32
+ crop_size = (512, 512)
33
+
34
+ data_preprocessor = dict(
35
+ type='DualInputSegDataPreProcessor',
36
+ mean=[123.675, 116.28, 103.53] * 2,
37
+ std=[58.395, 57.12, 57.375] * 2,
38
+ bgr_to_rgb=True,
39
+ pad_val=0,
40
+ seg_pad_val=255,
41
+ size_divisor=32,
42
+ test_cfg=dict(size_divisor=32)
43
+ )
44
+
45
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
46
+ fpn_norm_cfg = dict(type='LN2d', requires_grad=True)
47
+
48
+ # sam_pretrain_ckpt_path = 'https://download.openmmlab.com/mmclassification/v1/vit_sam/vit-large-p16_sam-pre_3rdparty_sa1b-1024px_20230411-595feafd.pth'
49
+
50
+ model = dict(
51
+ type='SiamEncoderDecoder',
52
+ data_preprocessor=data_preprocessor,
53
+ backbone=dict(
54
+ type='MMPretrainSamVisionEncoder',
55
+ encoder_cfg=dict(
56
+ type='mmpretrain.ViTSAM',
57
+ arch='large',
58
+ img_size=crop_size[0],
59
+ patch_size=16,
60
+ out_channels=256,
61
+ use_abs_pos=True,
62
+ use_rel_pos=True,
63
+ window_size=14,
64
+ layer_cfgs=dict(type='TimeFusionTransformerEncoderLayer'),
65
+ # init_cfg=dict(type='Pretrained', checkpoint=sam_pretrain_ckpt_path, prefix='backbone.'),
66
+ ),
67
+ peft_cfg=dict(
68
+ r=16,
69
+ target_modules=["qkv"],
70
+ lora_dropout=0.01,
71
+ bias='lora_only',
72
+ ),
73
+ ),
74
+ neck=dict(
75
+ type='SequentialNeck',
76
+ necks=[
77
+ dict(
78
+ type='FeatureFusionNeck',
79
+ policy='concat',
80
+ out_indices=(0,)),
81
+ dict(
82
+ type='SimpleFPN',
83
+ backbone_channel=512,
84
+ in_channels=[128, 256, 512, 512],
85
+ out_channels=256,
86
+ num_outs=5,
87
+ norm_cfg=fpn_norm_cfg),
88
+ ],
89
+ ),
90
+ decode_head=dict(
91
+ type='MLPSegHead',
92
+ out_size=(128, 128),
93
+ in_channels=[256]*5,
94
+ in_index=[0, 1, 2, 3, 4],
95
+ channels=256,
96
+ dropout_ratio=0,
97
+ num_classes=2,
98
+ norm_cfg=norm_cfg,
99
+ align_corners=False,
100
+ loss_decode=dict(
101
+ type='mmseg.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
102
+ train_cfg=dict(),
103
+ test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2))
104
+ ) # yapf: disable
105
+
106
+ dataset_type = 'LEVIR_CD_Dataset'
107
+ data_root = '/mnt/levir_datasets/levir-cd'
108
+
109
+
110
+ train_pipeline = [
111
+ dict(type='MultiImgLoadImageFromFile'),
112
+ dict(type='MultiImgLoadAnnotations'),
113
+ dict(type='MultiImgRandomRotate', prob=0.5, degree=180),
114
+ dict(type='MultiImgRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
115
+ dict(type='MultiImgRandomFlip', prob=0.5, direction='horizontal'),
116
+ dict(type='MultiImgRandomFlip', prob=0.5, direction='vertical'),
117
+ # dict(type='MultiImgExchangeTime', prob=0.5),
118
+ dict(
119
+ type='MultiImgPhotoMetricDistortion',
120
+ brightness_delta=10,
121
+ contrast_range=(0.8, 1.2),
122
+ saturation_range=(0.8, 1.2),
123
+ hue_delta=10),
124
+ dict(type='MultiImgPackSegInputs')
125
+ ]
126
+ test_pipeline = [
127
+ dict(type='MultiImgLoadImageFromFile', to_float32=True),
128
+ dict(type='MultiImgResize', scale=(1024, 1024), keep_ratio=True),
129
+ # add loading annotation after ``Resize`` because ground truth
130
+ # does not need to do resize data transform
131
+ dict(type='MultiImgLoadAnnotations'),
132
+ dict(type='MultiImgPackSegInputs')
133
+ ]
134
+
135
+ batch_size_per_gpu = 2
136
+
137
+ train_dataloader = dict(
138
+ batch_size=batch_size_per_gpu,
139
+ num_workers=8,
140
+ persistent_workers=True,
141
+ sampler=dict(type='DefaultSampler', shuffle=True),
142
+ dataset=dict(
143
+ type=dataset_type,
144
+ data_root=data_root,
145
+ data_prefix=dict(
146
+ seg_map_path='train/label',
147
+ img_path_from='train/A',
148
+ img_path_to='train/B'),
149
+ pipeline=train_pipeline)
150
+ )
151
+
152
+ val_dataloader = dict(
153
+ batch_size=1,
154
+ num_workers=4,
155
+ persistent_workers=True,
156
+ sampler=dict(type='DefaultSampler', shuffle=False),
157
+ dataset=dict(
158
+ type=dataset_type,
159
+ data_root=data_root,
160
+ data_prefix=dict(
161
+ seg_map_path='test/label',
162
+ img_path_from='test/A',
163
+ img_path_to='test/B'),
164
+ pipeline=test_pipeline)
165
+ )
166
+
167
+ test_dataloader = val_dataloader
168
+
169
+ val_evaluator = dict(
170
+ type='CDMetric',
171
+ )
172
+ test_evaluator = val_evaluator
173
+
174
+ max_epochs = 300
175
+ base_lr = 0.0004
176
+ param_scheduler = [
177
+ dict(
178
+ type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=5, convert_to_iter_based=True),
179
+ dict(
180
+ type='CosineAnnealingLR',
181
+ T_max=max_epochs,
182
+ begin=5,
183
+ by_epoch=True,
184
+ end=max_epochs,
185
+ convert_to_iter_based=True
186
+ ),
187
+ ]
188
+
189
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=5)
190
+ val_cfg = dict(type='ValLoop')
191
+ test_cfg = dict(type='TestLoop')
192
+
193
+
194
+ optim_wrapper = dict(
195
+ type='OptimWrapper',
196
+ optimizer=dict(
197
+ type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05),
198
+ )
199
+
demo/MMSegmentation_Tutorial.ipynb ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "colab_type": "text",
7
+ "id": "view-in-github"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/main/demo/MMSegmentation_Tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "FVmnaxFJvsb8"
17
+ },
18
+ "source": [
19
+ "# MMSegmentation Tutorial\n",
20
+ "Welcome to MMSegmentation! \n",
21
+ "\n",
22
+ "In this tutorial, we demo\n",
23
+ "* How to do inference with MMSeg trained weight\n",
24
+ "* How to train on your own dataset and visualize the results. "
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "markdown",
29
+ "metadata": {
30
+ "id": "QS8YHrEhbpas"
31
+ },
32
+ "source": [
33
+ "## Install MMSegmentation\n",
34
+ "This step may take several minutes. \n",
35
+ "\n",
36
+ "We use PyTorch 1.12 and CUDA 11.3 for this tutorial. You may install other versions by change the version number in pip install command. "
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": null,
42
+ "metadata": {
43
+ "colab": {
44
+ "base_uri": "https://localhost:8080/"
45
+ },
46
+ "id": "UWyLrLYaNEaL",
47
+ "outputId": "32a47fe3-f10d-47a1-f6b9-b7c235abdab1"
48
+ },
49
+ "outputs": [],
50
+ "source": [
51
+ "# Check nvcc version\n",
52
+ "!nvcc -V\n",
53
+ "# Check GCC version\n",
54
+ "!gcc --version"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "metadata": {
61
+ "colab": {
62
+ "base_uri": "https://localhost:8080/"
63
+ },
64
+ "id": "Ki3WUBjKbutg",
65
+ "outputId": "14bd14b0-4d8c-4fa9-e3f9-da35c0efc0d5"
66
+ },
67
+ "outputs": [],
68
+ "source": [
69
+ "# Install PyTorch\n",
70
+ "!conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.3 -c pytorch\n",
71
+ "# Install mim\n",
72
+ "!pip install -U openmim\n",
73
+ "# Install mmengine\n",
74
+ "!mim install mmengine\n",
75
+ "# Install MMCV\n",
76
+ "!mim install 'mmcv >= 2.0.0rc1'\n"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "metadata": {
83
+ "colab": {
84
+ "base_uri": "https://localhost:8080/"
85
+ },
86
+ "id": "nR-hHRvbNJJZ",
87
+ "outputId": "10c3b131-d4db-458c-fc10-b94b1c6ed546"
88
+ },
89
+ "outputs": [],
90
+ "source": [
91
+ "!rm -rf mmsegmentation\n",
92
+ "!git clone -b main https://github.com/open-mmlab/mmsegmentation.git \n",
93
+ "%cd mmsegmentation\n",
94
+ "!pip install -e ."
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "metadata": {
101
+ "colab": {
102
+ "base_uri": "https://localhost:8080/"
103
+ },
104
+ "id": "mAE_h7XhPT7d",
105
+ "outputId": "83bf0f8e-fc69-40b1-f9fe-0025724a217c"
106
+ },
107
+ "outputs": [],
108
+ "source": [
109
+ "# Check Pytorch installation\n",
110
+ "import torch, torchvision\n",
111
+ "print(torch.__version__, torch.cuda.is_available())\n",
112
+ "\n",
113
+ "# Check MMSegmentation installation\n",
114
+ "import mmseg\n",
115
+ "print(mmseg.__version__)"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "markdown",
120
+ "metadata": {
121
+ "id": "Ta51clKX4cwM"
122
+ },
123
+ "source": [
124
+ "## Finetune a semantic segmentation model on a new dataset\n",
125
+ "\n",
126
+ "To finetune on a customized dataset, the following steps are necessary. \n",
127
+ "1. Add a new dataset class. \n",
128
+ "2. Create a config file accordingly. \n",
129
+ "3. Perform training and evaluation. "
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "markdown",
134
+ "metadata": {
135
+ "id": "AcZg6x_K5Zs3"
136
+ },
137
+ "source": [
138
+ "### Add a new dataset\n",
139
+ "\n",
140
+ "Datasets in MMSegmentation require image and semantic segmentation maps to be placed in folders with the same prefix. To support a new dataset, we may need to modify the original file structure. \n",
141
+ "\n",
142
+ "In this tutorial, we give an example of converting the dataset. You may refer to [docs](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/tutorials/customize_datasets.md#customize-datasets-by-reorganizing-data) for details about dataset reorganization. \n",
143
+ "\n",
144
+ "We use [Stanford Background Dataset](http://dags.stanford.edu/projects/scenedataset.html) as an example. The dataset contains 715 images chosen from existing public datasets [LabelMe](http://labelme.csail.mit.edu), [MSRC](http://research.microsoft.com/en-us/projects/objectclassrecognition), [PASCAL VOC](http://pascallin.ecs.soton.ac.uk/challenges/VOC) and [Geometric Context](http://www.cs.illinois.edu/homes/dhoiem/). Images from these datasets are mainly outdoor scenes, each containing approximately 320-by-240 pixels. \n",
145
+ "In this tutorial, we use the region annotations as labels. There are 8 classes in total, i.e. sky, tree, road, grass, water, building, mountain, and foreground object. "
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": null,
151
+ "metadata": {
152
+ "colab": {
153
+ "base_uri": "https://localhost:8080/"
154
+ },
155
+ "id": "TFIt7MHq5Wls",
156
+ "outputId": "74a126e4-c8a4-4d2f-a910-b58b71843a23"
157
+ },
158
+ "outputs": [],
159
+ "source": [
160
+ "# download and unzip\n",
161
+ "!wget http://dags.stanford.edu/data/iccv09Data.tar.gz -O stanford_background.tar.gz\n",
162
+ "!tar xf stanford_background.tar.gz"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "metadata": {
169
+ "colab": {
170
+ "base_uri": "https://localhost:8080/",
171
+ "height": 377
172
+ },
173
+ "id": "78LIci7F9WWI",
174
+ "outputId": "c432ddac-5a50-47b1-daac-5a26b07afea2"
175
+ },
176
+ "outputs": [],
177
+ "source": [
178
+ "# Let's take a look at the dataset\n",
179
+ "import mmcv\n",
180
+ "import mmengine\n",
181
+ "import matplotlib.pyplot as plt\n",
182
+ "\n",
183
+ "\n",
184
+ "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
185
+ "plt.figure(figsize=(8, 6))\n",
186
+ "plt.imshow(mmcv.bgr2rgb(img))\n",
187
+ "plt.show()"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "markdown",
192
+ "metadata": {
193
+ "id": "L5mNQuc2GsVE"
194
+ },
195
+ "source": [
196
+ "We need to convert the annotation into semantic map format as an image."
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": null,
202
+ "metadata": {
203
+ "id": "WnGZfribFHCx"
204
+ },
205
+ "outputs": [],
206
+ "source": [
207
+ "# define dataset root and directory for images and annotations\n",
208
+ "data_root = 'iccv09Data'\n",
209
+ "img_dir = 'images'\n",
210
+ "ann_dir = 'labels'\n",
211
+ "# define class and palette for better visualization\n",
212
+ "classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n",
213
+ "palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n",
214
+ " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": null,
220
+ "metadata": {
221
+ "id": "WnGZfribFHCx"
222
+ },
223
+ "outputs": [],
224
+ "source": [
225
+ "import os.path as osp\n",
226
+ "import numpy as np\n",
227
+ "from PIL import Image\n",
228
+ "\n",
229
+ "# convert dataset annotation to semantic segmentation map\n",
230
+ "for file in mmengine.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
231
+ " seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n",
232
+ " seg_img = Image.fromarray(seg_map).convert('P')\n",
233
+ " seg_img.putpalette(np.array(palette, dtype=np.uint8))\n",
234
+ " seg_img.save(osp.join(data_root, ann_dir, file.replace('.regions.txt', \n",
235
+ " '.png')))"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": null,
241
+ "metadata": {
242
+ "colab": {
243
+ "base_uri": "https://localhost:8080/",
244
+ "height": 377
245
+ },
246
+ "id": "5MCSS9ABfSks",
247
+ "outputId": "92b9bafc-589e-48fc-c9e9-476f125d6522"
248
+ },
249
+ "outputs": [],
250
+ "source": [
251
+ "# Let's take a look at the segmentation map we got\n",
252
+ "import matplotlib.patches as mpatches\n",
253
+ "img = Image.open('iccv09Data/labels/6000124.png')\n",
254
+ "plt.figure(figsize=(8, 6))\n",
255
+ "im = plt.imshow(np.array(img.convert('RGB')))\n",
256
+ "\n",
257
+ "# create a patch (proxy artist) for every color \n",
258
+ "patches = [mpatches.Patch(color=np.array(palette[i])/255., \n",
259
+ " label=classes[i]) for i in range(8)]\n",
260
+ "# put those patched as legend-handles into the legend\n",
261
+ "plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., \n",
262
+ " fontsize='large')\n",
263
+ "\n",
264
+ "plt.show()"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": null,
270
+ "metadata": {
271
+ "id": "WbeLYCp2k5hl"
272
+ },
273
+ "outputs": [],
274
+ "source": [
275
+ "# split train/val set randomly\n",
276
+ "split_dir = 'splits'\n",
277
+ "mmengine.mkdir_or_exist(osp.join(data_root, split_dir))\n",
278
+ "filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir(\n",
279
+ " osp.join(data_root, ann_dir), suffix='.png')]\n",
280
+ "with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n",
281
+ " # select first 4/5 as train set\n",
282
+ " train_length = int(len(filename_list)*4/5)\n",
283
+ " f.writelines(line + '\\n' for line in filename_list[:train_length])\n",
284
+ "with open(osp.join(data_root, split_dir, 'val.txt'), 'w') as f:\n",
285
+ " # select last 1/5 as train set\n",
286
+ " f.writelines(line + '\\n' for line in filename_list[train_length:])"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "markdown",
291
+ "metadata": {
292
+ "id": "HchvmGYB_rrO"
293
+ },
294
+ "source": [
295
+ "After downloading the data, we need to implement `load_annotations` function in the new dataset class `StanfordBackgroundDataset`."
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": null,
301
+ "metadata": {
302
+ "id": "LbsWOw62_o-X"
303
+ },
304
+ "outputs": [],
305
+ "source": [
306
+ "from mmseg.registry import DATASETS\n",
307
+ "from mmseg.datasets import BaseSegDataset\n",
308
+ "\n",
309
+ "\n",
310
+ "@DATASETS.register_module()\n",
311
+ "class StanfordBackgroundDataset(BaseSegDataset):\n",
312
+ " METAINFO = dict(classes = classes, palette = palette)\n",
313
+ " def __init__(self, **kwargs):\n",
314
+ " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)\n",
315
+ " "
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "markdown",
320
+ "metadata": {
321
+ "id": "yUVtmn3Iq3WA"
322
+ },
323
+ "source": [
324
+ "### Create a config file\n",
325
+ "In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights."
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": null,
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "# Download config and checkpoint files\n",
335
+ "!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest ."
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "metadata": {
342
+ "id": "Wwnj9tRzqX_A"
343
+ },
344
+ "outputs": [],
345
+ "source": [
346
+ "from mmengine import Config\n",
347
+ "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')\n",
348
+ "print(f'Config:\\n{cfg.pretty_text}')"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "markdown",
353
+ "metadata": {
354
+ "id": "1y2oV5w97jQo"
355
+ },
356
+ "source": [
357
+ "Since the given config is used to train PSPNet on the cityscapes dataset, we need to modify it accordingly for our new dataset. "
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": null,
363
+ "metadata": {
364
+ "colab": {
365
+ "base_uri": "https://localhost:8080/"
366
+ },
367
+ "id": "eyKnYC1Z7iCV",
368
+ "outputId": "6195217b-187f-4675-994b-ba90d8bb3078"
369
+ },
370
+ "outputs": [],
371
+ "source": [
372
+ "# Since we use only one GPU, BN is used instead of SyncBN\n",
373
+ "cfg.norm_cfg = dict(type='BN', requires_grad=True)\n",
374
+ "cfg.crop_size = (256, 256)\n",
375
+ "cfg.model.data_preprocessor.size = cfg.crop_size\n",
376
+ "cfg.model.backbone.norm_cfg = cfg.norm_cfg\n",
377
+ "cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n",
378
+ "cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n",
379
+ "# modify num classes of the model in decode/auxiliary head\n",
380
+ "cfg.model.decode_head.num_classes = 8\n",
381
+ "cfg.model.auxiliary_head.num_classes = 8\n",
382
+ "\n",
383
+ "# Modify dataset type and path\n",
384
+ "cfg.dataset_type = 'StanfordBackgroundDataset'\n",
385
+ "cfg.data_root = data_root\n",
386
+ "\n",
387
+ "cfg.train_dataloader.batch_size = 8\n",
388
+ "\n",
389
+ "cfg.train_pipeline = [\n",
390
+ " dict(type='LoadImageFromFile'),\n",
391
+ " dict(type='LoadAnnotations'),\n",
392
+ " dict(type='RandomResize', scale=(320, 240), ratio_range=(0.5, 2.0), keep_ratio=True),\n",
393
+ " dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n",
394
+ " dict(type='RandomFlip', prob=0.5),\n",
395
+ " dict(type='PackSegInputs')\n",
396
+ "]\n",
397
+ "\n",
398
+ "cfg.test_pipeline = [\n",
399
+ " dict(type='LoadImageFromFile'),\n",
400
+ " dict(type='Resize', scale=(320, 240), keep_ratio=True),\n",
401
+ " # add loading annotation after ``Resize`` because ground truth\n",
402
+ " # does not need to do resize data transform\n",
403
+ " dict(type='LoadAnnotations'),\n",
404
+ " dict(type='PackSegInputs')\n",
405
+ "]\n",
406
+ "\n",
407
+ "\n",
408
+ "cfg.train_dataloader.dataset.type = cfg.dataset_type\n",
409
+ "cfg.train_dataloader.dataset.data_root = cfg.data_root\n",
410
+ "cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
411
+ "cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline\n",
412
+ "cfg.train_dataloader.dataset.ann_file = 'splits/train.txt'\n",
413
+ "\n",
414
+ "cfg.val_dataloader.dataset.type = cfg.dataset_type\n",
415
+ "cfg.val_dataloader.dataset.data_root = cfg.data_root\n",
416
+ "cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
417
+ "cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline\n",
418
+ "cfg.val_dataloader.dataset.ann_file = 'splits/val.txt'\n",
419
+ "\n",
420
+ "cfg.test_dataloader = cfg.val_dataloader\n",
421
+ "\n",
422
+ "\n",
423
+ "# Load the pretrained weights\n",
424
+ "cfg.load_from = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
425
+ "\n",
426
+ "# Set up working dir to save files and logs.\n",
427
+ "cfg.work_dir = './work_dirs/tutorial'\n",
428
+ "\n",
429
+ "cfg.train_cfg.max_iters = 200\n",
430
+ "cfg.train_cfg.val_interval = 200\n",
431
+ "cfg.default_hooks.logger.interval = 10\n",
432
+ "cfg.default_hooks.checkpoint.interval = 200\n",
433
+ "\n",
434
+ "# Set seed to facilitate reproducing the result\n",
435
+ "cfg['randomness'] = dict(seed=0)\n",
436
+ "\n",
437
+ "# Let's have a look at the final config used for training\n",
438
+ "print(f'Config:\\n{cfg.pretty_text}')"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "markdown",
443
+ "metadata": {
444
+ "id": "QWuH14LYF2gQ"
445
+ },
446
+ "source": [
447
+ "### Train and Evaluation"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "code",
452
+ "execution_count": null,
453
+ "metadata": {
454
+ "colab": {
455
+ "base_uri": "https://localhost:8080/"
456
+ },
457
+ "id": "jYKoSfdMF12B",
458
+ "outputId": "422219ca-d7a5-4890-f09f-88c959942e64"
459
+ },
460
+ "outputs": [],
461
+ "source": [
462
+ "from mmengine.runner import Runner\n",
463
+ "\n",
464
+ "runner = Runner.from_cfg(cfg)"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": null,
470
+ "metadata": {},
471
+ "outputs": [],
472
+ "source": [
473
+ "# start training\n",
474
+ "runner.train()"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "markdown",
479
+ "metadata": {
480
+ "id": "DEkWOP-NMbc_"
481
+ },
482
+ "source": [
483
+ "Inference with trained model"
484
+ ]
485
+ },
486
+ {
487
+ "cell_type": "code",
488
+ "execution_count": null,
489
+ "metadata": {
490
+ "colab": {
491
+ "base_uri": "https://localhost:8080/",
492
+ "height": 645
493
+ },
494
+ "id": "ekG__UfaH_OU",
495
+ "outputId": "1437419c-869a-4902-df86-d4f6f8b2597a"
496
+ },
497
+ "outputs": [],
498
+ "source": [
499
+ "from mmseg.apis import init_model, inference_model, show_result_pyplot\n",
500
+ "\n",
501
+ "# Init the model from the config and the checkpoint\n",
502
+ "checkpoint_path = './work_dirs/tutorial/iter_200.pth'\n",
503
+ "model = init_model(cfg, checkpoint_path, 'cuda:0')\n",
504
+ "\n",
505
+ "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
506
+ "result = inference_model(model, img)\n",
507
+ "plt.figure(figsize=(8, 6))\n",
508
+ "vis_result = show_result_pyplot(model, img, result)\n",
509
+ "plt.imshow(mmcv.bgr2rgb(vis_result))\n"
510
+ ]
511
+ }
512
+ ],
513
+ "metadata": {
514
+ "accelerator": "GPU",
515
+ "colab": {
516
+ "collapsed_sections": [],
517
+ "include_colab_link": true,
518
+ "name": "MMSegmentation Tutorial.ipynb",
519
+ "provenance": []
520
+ },
521
+ "kernelspec": {
522
+ "display_name": "Python 3.10.6 ('pt1.12')",
523
+ "language": "python",
524
+ "name": "python3"
525
+ },
526
+ "language_info": {
527
+ "codemirror_mode": {
528
+ "name": "ipython",
529
+ "version": 3
530
+ },
531
+ "file_extension": ".py",
532
+ "mimetype": "text/x-python",
533
+ "name": "python",
534
+ "nbconvert_exporter": "python",
535
+ "pygments_lexer": "ipython3",
536
+ "version": "3.10.6"
537
+ },
538
+ "pycharm": {
539
+ "stem_cell": {
540
+ "cell_type": "raw",
541
+ "metadata": {
542
+ "collapsed": false
543
+ },
544
+ "source": []
545
+ }
546
+ },
547
+ "vscode": {
548
+ "interpreter": {
549
+ "hash": "0442e67aee3d9cbb788fa6e86d60c4ffa94ad7f1943c65abfecb99a6f4696c58"
550
+ }
551
+ }
552
+ },
553
+ "nbformat": 4,
554
+ "nbformat_minor": 2
555
+ }
demo/classroom__rgb_00283.jpg ADDED
demo/demo.png ADDED
demo/image_demo.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from argparse import ArgumentParser
3
+
4
+ from mmengine.model import revert_sync_batchnorm
5
+
6
+ from mmseg.apis import inference_model, init_model, show_result_pyplot
7
+
8
+
9
+ def main():
10
+ parser = ArgumentParser()
11
+ parser.add_argument('img', help='Image file')
12
+ parser.add_argument('config', help='Config file')
13
+ parser.add_argument('checkpoint', help='Checkpoint file')
14
+ parser.add_argument('--out-file', default=None, help='Path to output file')
15
+ parser.add_argument(
16
+ '--device', default='cuda:0', help='Device used for inference')
17
+ parser.add_argument(
18
+ '--opacity',
19
+ type=float,
20
+ default=0.5,
21
+ help='Opacity of painted segmentation map. In (0, 1] range.')
22
+ parser.add_argument(
23
+ '--with-labels',
24
+ action='store_true',
25
+ default=False,
26
+ help='Whether to display the class labels.')
27
+ parser.add_argument(
28
+ '--title', default='result', help='The image identifier.')
29
+ args = parser.parse_args()
30
+
31
+ # build the model from a config file and a checkpoint file
32
+ model = init_model(args.config, args.checkpoint, device=args.device)
33
+ if args.device == 'cpu':
34
+ model = revert_sync_batchnorm(model)
35
+ # test a single image
36
+ result = inference_model(model, args.img)
37
+ # show the results
38
+ show_result_pyplot(
39
+ model,
40
+ args.img,
41
+ result,
42
+ title=args.title,
43
+ opacity=args.opacity,
44
+ with_labels=args.with_labels,
45
+ draw_gt=False,
46
+ show=False if args.out_file is not None else True,
47
+ out_file=args.out_file)
48
+
49
+
50
+ if __name__ == '__main__':
51
+ main()
demo/image_demo_with_inferencer.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from argparse import ArgumentParser
3
+
4
+ from mmseg.apis import MMSegInferencer
5
+
6
+
7
+ def main():
8
+ parser = ArgumentParser()
9
+ parser.add_argument('img', help='Image file')
10
+ parser.add_argument('model', help='Config file')
11
+ parser.add_argument('--checkpoint', default=None, help='Checkpoint file')
12
+ parser.add_argument(
13
+ '--out-dir', default='', help='Path to save result file')
14
+ parser.add_argument(
15
+ '--show',
16
+ action='store_true',
17
+ default=False,
18
+ help='Whether to display the drawn image.')
19
+ parser.add_argument(
20
+ '--dataset-name',
21
+ default='cityscapes',
22
+ help='Color palette used for segmentation map')
23
+ parser.add_argument(
24
+ '--device', default='cuda:0', help='Device used for inference')
25
+ parser.add_argument(
26
+ '--opacity',
27
+ type=float,
28
+ default=0.5,
29
+ help='Opacity of painted segmentation map. In (0, 1] range.')
30
+ parser.add_argument(
31
+ '--with-labels',
32
+ action='store_true',
33
+ default=False,
34
+ help='Whether to display the class labels.')
35
+ args = parser.parse_args()
36
+
37
+ # build the model from a config file and a checkpoint file
38
+ mmseg_inferencer = MMSegInferencer(
39
+ args.model,
40
+ args.checkpoint,
41
+ dataset_name=args.dataset_name,
42
+ device=args.device)
43
+
44
+ # test a single image
45
+ mmseg_inferencer(
46
+ args.img,
47
+ show=args.show,
48
+ out_dir=args.out_dir,
49
+ opacity=args.opacity,
50
+ with_labels=args.with_labels)
51
+
52
+
53
+ if __name__ == '__main__':
54
+ main()
demo/inference_demo.ipynb ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "!mkdir ../checkpoints\n",
10
+ "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P ../checkpoints"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {
17
+ "pycharm": {
18
+ "is_executing": true
19
+ }
20
+ },
21
+ "outputs": [],
22
+ "source": [
23
+ "import torch\n",
24
+ "import matplotlib.pyplot as plt\n",
25
+ "from mmengine.model.utils import revert_sync_batchnorm\n",
26
+ "from mmseg.apis import init_model, inference_model, show_result_pyplot"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {
33
+ "pycharm": {
34
+ "is_executing": true
35
+ }
36
+ },
37
+ "outputs": [],
38
+ "source": [
39
+ "config_file = '../configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'\n",
40
+ "checkpoint_file = '../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "# build the model from a config file and a checkpoint file\n",
50
+ "model = init_model(config_file, checkpoint_file, device='cpu')"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "metadata": {},
57
+ "outputs": [],
58
+ "source": [
59
+ "# test a single image\n",
60
+ "img = 'demo.png'\n",
61
+ "if not torch.cuda.is_available():\n",
62
+ " model = revert_sync_batchnorm(model)\n",
63
+ "result = inference_model(model, img)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "# show the results\n",
73
+ "vis_result = show_result_pyplot(model, img, result, show=False)\n",
74
+ "plt.imshow(vis_result)"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": []
83
+ }
84
+ ],
85
+ "metadata": {
86
+ "kernelspec": {
87
+ "display_name": "pt1.13",
88
+ "language": "python",
89
+ "name": "python3"
90
+ },
91
+ "language_info": {
92
+ "codemirror_mode": {
93
+ "name": "ipython",
94
+ "version": 3
95
+ },
96
+ "file_extension": ".py",
97
+ "mimetype": "text/x-python",
98
+ "name": "python",
99
+ "nbconvert_exporter": "python",
100
+ "pygments_lexer": "ipython3",
101
+ "version": "3.10.11"
102
+ },
103
+ "pycharm": {
104
+ "stem_cell": {
105
+ "cell_type": "raw",
106
+ "metadata": {
107
+ "collapsed": false
108
+ },
109
+ "source": []
110
+ }
111
+ },
112
+ "vscode": {
113
+ "interpreter": {
114
+ "hash": "f61d5b8fecdd960739697f6c2860080d7b76a5be5d896cb034bdb275ab3ddda0"
115
+ }
116
+ }
117
+ },
118
+ "nbformat": 4,
119
+ "nbformat_minor": 4
120
+ }
demo/rs_image_inference.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from argparse import ArgumentParser
3
+
4
+ from mmseg.apis import RSImage, RSInferencer
5
+
6
+
7
+ def main():
8
+ parser = ArgumentParser()
9
+ parser.add_argument('image', help='Image file path')
10
+ parser.add_argument('config', help='Config file')
11
+ parser.add_argument('checkpoint', help='Checkpoint file')
12
+ parser.add_argument(
13
+ '--output-path',
14
+ help='Path to save result image',
15
+ default='result.png')
16
+ parser.add_argument(
17
+ '--batch-size',
18
+ type=int,
19
+ default=1,
20
+ help='maximum number of windows inferred simultaneously')
21
+ parser.add_argument(
22
+ '--window-size',
23
+ help='window xsize,ysize',
24
+ default=(224, 224),
25
+ type=int,
26
+ nargs=2)
27
+ parser.add_argument(
28
+ '--stride',
29
+ help='window xstride,ystride',
30
+ default=(224, 224),
31
+ type=int,
32
+ nargs=2)
33
+ parser.add_argument(
34
+ '--thread', default=1, type=int, help='number of inference threads')
35
+ parser.add_argument(
36
+ '--device', default='cuda:0', help='Device used for inference')
37
+ args = parser.parse_args()
38
+ inferencer = RSInferencer.from_config_path(
39
+ args.config,
40
+ args.checkpoint,
41
+ batch_size=args.batch_size,
42
+ thread=args.thread,
43
+ device=args.device)
44
+ image = RSImage(args.image)
45
+
46
+ inferencer.run(image, args.window_size, args.stride, args.output_path)
47
+
48
+
49
+ if __name__ == '__main__':
50
+ main()
demo/video_demo.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from argparse import ArgumentParser
3
+
4
+ import cv2
5
+ from mmengine.model.utils import revert_sync_batchnorm
6
+
7
+ from mmseg.apis import inference_model, init_model
8
+ from mmseg.apis.inference import show_result_pyplot
9
+
10
+
11
+ def main():
12
+ parser = ArgumentParser()
13
+ parser.add_argument('video', help='Video file or webcam id')
14
+ parser.add_argument('config', help='Config file')
15
+ parser.add_argument('checkpoint', help='Checkpoint file')
16
+ parser.add_argument(
17
+ '--device', default='cuda:0', help='Device used for inference')
18
+ parser.add_argument(
19
+ '--palette',
20
+ default='cityscapes',
21
+ help='Color palette used for segmentation map')
22
+ parser.add_argument(
23
+ '--show', action='store_true', help='Whether to show draw result')
24
+ parser.add_argument(
25
+ '--show-wait-time', default=1, type=int, help='Wait time after imshow')
26
+ parser.add_argument(
27
+ '--output-file', default=None, type=str, help='Output video file path')
28
+ parser.add_argument(
29
+ '--output-fourcc',
30
+ default='MJPG',
31
+ type=str,
32
+ help='Fourcc of the output video')
33
+ parser.add_argument(
34
+ '--output-fps', default=-1, type=int, help='FPS of the output video')
35
+ parser.add_argument(
36
+ '--output-height',
37
+ default=-1,
38
+ type=int,
39
+ help='Frame height of the output video')
40
+ parser.add_argument(
41
+ '--output-width',
42
+ default=-1,
43
+ type=int,
44
+ help='Frame width of the output video')
45
+ parser.add_argument(
46
+ '--opacity',
47
+ type=float,
48
+ default=0.5,
49
+ help='Opacity of painted segmentation map. In (0, 1] range.')
50
+ args = parser.parse_args()
51
+
52
+ assert args.show or args.output_file, \
53
+ 'At least one output should be enabled.'
54
+
55
+ # build the model from a config file and a checkpoint file
56
+ model = init_model(args.config, args.checkpoint, device=args.device)
57
+ if args.device == 'cpu':
58
+ model = revert_sync_batchnorm(model)
59
+
60
+ # build input video
61
+ if args.video.isdigit():
62
+ args.video = int(args.video)
63
+ cap = cv2.VideoCapture(args.video)
64
+ assert (cap.isOpened())
65
+ input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
66
+ input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
67
+ input_fps = cap.get(cv2.CAP_PROP_FPS)
68
+
69
+ # init output video
70
+ writer = None
71
+ output_height = None
72
+ output_width = None
73
+ if args.output_file is not None:
74
+ fourcc = cv2.VideoWriter_fourcc(*args.output_fourcc)
75
+ output_fps = args.output_fps if args.output_fps > 0 else input_fps
76
+ output_height = args.output_height if args.output_height > 0 else int(
77
+ input_height)
78
+ output_width = args.output_width if args.output_width > 0 else int(
79
+ input_width)
80
+ writer = cv2.VideoWriter(args.output_file, fourcc, output_fps,
81
+ (output_width, output_height), True)
82
+
83
+ # start looping
84
+ try:
85
+ while True:
86
+ flag, frame = cap.read()
87
+ if not flag:
88
+ break
89
+
90
+ # test a single image
91
+ result = inference_model(model, frame)
92
+
93
+ # blend raw image and prediction
94
+ draw_img = show_result_pyplot(model, frame, result)
95
+
96
+ if args.show:
97
+ cv2.imshow('video_demo', draw_img)
98
+ cv2.waitKey(args.show_wait_time)
99
+ if writer:
100
+ if draw_img.shape[0] != output_height or draw_img.shape[
101
+ 1] != output_width:
102
+ draw_img = cv2.resize(draw_img,
103
+ (output_width, output_height))
104
+ writer.write(draw_img)
105
+ finally:
106
+ if writer:
107
+ writer.release()
108
+ cap.release()
109
+
110
+
111
+ if __name__ == '__main__':
112
+ main()
mmdet/.DS_Store ADDED
Binary file (8.2 kB). View file
 
mmdet/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import mmcv
3
+ import mmengine
4
+ from mmengine.utils import digit_version
5
+
6
+ from .version import __version__, version_info
7
+
8
+ mmcv_minimum_version = '2.0.0rc4'
9
+ mmcv_maximum_version = '2.2.0'
10
+ mmcv_version = digit_version(mmcv.__version__)
11
+
12
+ mmengine_minimum_version = '0.7.1'
13
+ mmengine_maximum_version = '1.0.0'
14
+ mmengine_version = digit_version(mmengine.__version__)
15
+
16
+ assert (mmcv_version >= digit_version(mmcv_minimum_version)
17
+ and mmcv_version < digit_version(mmcv_maximum_version)), \
18
+ f'MMCV=={mmcv.__version__} is used but incompatible. ' \
19
+ f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.'
20
+
21
+ assert (mmengine_version >= digit_version(mmengine_minimum_version)
22
+ and mmengine_version < digit_version(mmengine_maximum_version)), \
23
+ f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
24
+ f'Please install mmengine>={mmengine_minimum_version}, ' \
25
+ f'<{mmengine_maximum_version}.'
26
+
27
+ __all__ = ['__version__', 'version_info', 'digit_version']
mmdet/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.29 kB). View file
 
mmdet/__pycache__/registry.cpython-311.pyc ADDED
Binary file (3.82 kB). View file
 
mmdet/__pycache__/version.cpython-311.pyc ADDED
Binary file (1.35 kB). View file
 
mmdet/apis/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .det_inferencer import DetInferencer
3
+ from .inference import (async_inference_detector, inference_detector,
4
+ inference_mot, init_detector, init_track_model)
5
+
6
+ __all__ = [
7
+ 'init_detector', 'async_inference_detector', 'inference_detector',
8
+ 'DetInferencer', 'inference_mot', 'init_track_model'
9
+ ]
mmdet/apis/det_inferencer.py ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import copy
3
+ import os.path as osp
4
+ import warnings
5
+ from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
6
+
7
+ import mmcv
8
+ import mmengine
9
+ import numpy as np
10
+ import torch.nn as nn
11
+ from mmcv.transforms import LoadImageFromFile
12
+ from mmengine.dataset import Compose
13
+ from mmengine.fileio import (get_file_backend, isdir, join_path,
14
+ list_dir_or_file)
15
+ from mmengine.infer.infer import BaseInferencer, ModelType
16
+ from mmengine.model.utils import revert_sync_batchnorm
17
+ from mmengine.registry import init_default_scope
18
+ from mmengine.runner.checkpoint import _load_checkpoint_to_model
19
+ from mmengine.visualization import Visualizer
20
+ from rich.progress import track
21
+
22
+ from mmdet.evaluation import INSTANCE_OFFSET
23
+ from mmdet.registry import DATASETS
24
+ from mmdet.structures import DetDataSample
25
+ from mmdet.structures.mask import encode_mask_results, mask2bbox
26
+ from mmdet.utils import ConfigType
27
+ from ..evaluation import get_classes
28
+
29
+ try:
30
+ from panopticapi.evaluation import VOID
31
+ from panopticapi.utils import id2rgb
32
+ except ImportError:
33
+ id2rgb = None
34
+ VOID = None
35
+
36
+ InputType = Union[str, np.ndarray]
37
+ InputsType = Union[InputType, Sequence[InputType]]
38
+ PredType = List[DetDataSample]
39
+ ImgType = Union[np.ndarray, Sequence[np.ndarray]]
40
+
41
+ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
42
+ '.tiff', '.webp')
43
+
44
+
45
+ class DetInferencer(BaseInferencer):
46
+ """Object Detection Inferencer.
47
+
48
+ Args:
49
+ model (str, optional): Path to the config file or the model name
50
+ defined in metafile. For example, it could be
51
+ "rtmdet-s" or 'rtmdet_s_8xb32-300e_coco' or
52
+ "configs/rtmdet/rtmdet_s_8xb32-300e_coco.py".
53
+ If model is not specified, user must provide the
54
+ `weights` saved by MMEngine which contains the config string.
55
+ Defaults to None.
56
+ weights (str, optional): Path to the checkpoint. If it is not specified
57
+ and model is a model name of metafile, the weights will be loaded
58
+ from metafile. Defaults to None.
59
+ device (str, optional): Device to run inference. If None, the available
60
+ device will be automatically used. Defaults to None.
61
+ scope (str, optional): The scope of the model. Defaults to mmdet.
62
+ palette (str): Color palette used for visualization. The order of
63
+ priority is palette -> config -> checkpoint. Defaults to 'none'.
64
+ show_progress (bool): Control whether to display the progress
65
+ bar during the inference process. Defaults to True.
66
+ """
67
+
68
+ preprocess_kwargs: set = set()
69
+ forward_kwargs: set = set()
70
+ visualize_kwargs: set = {
71
+ 'return_vis',
72
+ 'show',
73
+ 'wait_time',
74
+ 'draw_pred',
75
+ 'pred_score_thr',
76
+ 'img_out_dir',
77
+ 'no_save_vis',
78
+ }
79
+ postprocess_kwargs: set = {
80
+ 'print_result',
81
+ 'pred_out_dir',
82
+ 'return_datasamples',
83
+ 'no_save_pred',
84
+ }
85
+
86
+ def __init__(self,
87
+ model: Optional[Union[ModelType, str]] = None,
88
+ weights: Optional[str] = None,
89
+ device: Optional[str] = None,
90
+ scope: Optional[str] = 'mmdet',
91
+ palette: str = 'none',
92
+ show_progress: bool = True) -> None:
93
+ # A global counter tracking the number of images processed, for
94
+ # naming of the output images
95
+ self.num_visualized_imgs = 0
96
+ self.num_predicted_imgs = 0
97
+ self.palette = palette
98
+ init_default_scope(scope)
99
+ super().__init__(
100
+ model=model, weights=weights, device=device, scope=scope)
101
+ self.model = revert_sync_batchnorm(self.model)
102
+ self.show_progress = show_progress
103
+
104
+ def _load_weights_to_model(self, model: nn.Module,
105
+ checkpoint: Optional[dict],
106
+ cfg: Optional[ConfigType]) -> None:
107
+ """Loading model weights and meta information from cfg and checkpoint.
108
+
109
+ Args:
110
+ model (nn.Module): Model to load weights and meta information.
111
+ checkpoint (dict, optional): The loaded checkpoint.
112
+ cfg (Config or ConfigDict, optional): The loaded config.
113
+ """
114
+
115
+ if checkpoint is not None:
116
+ _load_checkpoint_to_model(model, checkpoint)
117
+ checkpoint_meta = checkpoint.get('meta', {})
118
+ # save the dataset_meta in the model for convenience
119
+ if 'dataset_meta' in checkpoint_meta:
120
+ # mmdet 3.x, all keys should be lowercase
121
+ model.dataset_meta = {
122
+ k.lower(): v
123
+ for k, v in checkpoint_meta['dataset_meta'].items()
124
+ }
125
+ elif 'CLASSES' in checkpoint_meta:
126
+ # < mmdet 3.x
127
+ classes = checkpoint_meta['CLASSES']
128
+ model.dataset_meta = {'classes': classes}
129
+ else:
130
+ warnings.warn(
131
+ 'dataset_meta or class names are not saved in the '
132
+ 'checkpoint\'s meta data, use COCO classes by default.')
133
+ model.dataset_meta = {'classes': get_classes('coco')}
134
+ else:
135
+ warnings.warn('Checkpoint is not loaded, and the inference '
136
+ 'result is calculated by the randomly initialized '
137
+ 'model!')
138
+ warnings.warn('weights is None, use COCO classes by default.')
139
+ model.dataset_meta = {'classes': get_classes('coco')}
140
+
141
+ # Priority: args.palette -> config -> checkpoint
142
+ if self.palette != 'none':
143
+ model.dataset_meta['palette'] = self.palette
144
+ else:
145
+ test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
146
+ # lazy init. We only need the metainfo.
147
+ test_dataset_cfg['lazy_init'] = True
148
+ metainfo = DATASETS.build(test_dataset_cfg).metainfo
149
+ cfg_palette = metainfo.get('palette', None)
150
+ if cfg_palette is not None:
151
+ model.dataset_meta['palette'] = cfg_palette
152
+ else:
153
+ if 'palette' not in model.dataset_meta:
154
+ warnings.warn(
155
+ 'palette does not exist, random is used by default. '
156
+ 'You can also set the palette to customize.')
157
+ model.dataset_meta['palette'] = 'random'
158
+
159
+ def _init_pipeline(self, cfg: ConfigType) -> Compose:
160
+ """Initialize the test pipeline."""
161
+ pipeline_cfg = cfg.test_dataloader.dataset.pipeline
162
+
163
+ # For inference, the key of ``img_id`` is not used.
164
+ if 'meta_keys' in pipeline_cfg[-1]:
165
+ pipeline_cfg[-1]['meta_keys'] = tuple(
166
+ meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
167
+ if meta_key != 'img_id')
168
+
169
+ load_img_idx = self._get_transform_idx(
170
+ pipeline_cfg, ('LoadImageFromFile', LoadImageFromFile))
171
+ if load_img_idx == -1:
172
+ raise ValueError(
173
+ 'LoadImageFromFile is not found in the test pipeline')
174
+ pipeline_cfg[load_img_idx]['type'] = 'mmdet.InferencerLoader'
175
+ return Compose(pipeline_cfg)
176
+
177
+ def _get_transform_idx(self, pipeline_cfg: ConfigType,
178
+ name: Union[str, Tuple[str, type]]) -> int:
179
+ """Returns the index of the transform in a pipeline.
180
+
181
+ If the transform is not found, returns -1.
182
+ """
183
+ for i, transform in enumerate(pipeline_cfg):
184
+ if transform['type'] in name:
185
+ return i
186
+ return -1
187
+
188
+ def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]:
189
+ """Initialize visualizers.
190
+
191
+ Args:
192
+ cfg (ConfigType): Config containing the visualizer information.
193
+
194
+ Returns:
195
+ Visualizer or None: Visualizer initialized with config.
196
+ """
197
+ visualizer = super()._init_visualizer(cfg)
198
+ visualizer.dataset_meta = self.model.dataset_meta
199
+ return visualizer
200
+
201
+ def _inputs_to_list(self, inputs: InputsType) -> list:
202
+ """Preprocess the inputs to a list.
203
+
204
+ Preprocess inputs to a list according to its type:
205
+
206
+ - list or tuple: return inputs
207
+ - str:
208
+ - Directory path: return all files in the directory
209
+ - other cases: return a list containing the string. The string
210
+ could be a path to file, a url or other types of string according
211
+ to the task.
212
+
213
+ Args:
214
+ inputs (InputsType): Inputs for the inferencer.
215
+
216
+ Returns:
217
+ list: List of input for the :meth:`preprocess`.
218
+ """
219
+ if isinstance(inputs, str):
220
+ backend = get_file_backend(inputs)
221
+ if hasattr(backend, 'isdir') and isdir(inputs):
222
+ # Backends like HttpsBackend do not implement `isdir`, so only
223
+ # those backends that implement `isdir` could accept the inputs
224
+ # as a directory
225
+ filename_list = list_dir_or_file(
226
+ inputs, list_dir=False, suffix=IMG_EXTENSIONS)
227
+ inputs = [
228
+ join_path(inputs, filename) for filename in filename_list
229
+ ]
230
+
231
+ if not isinstance(inputs, (list, tuple)):
232
+ inputs = [inputs]
233
+
234
+ return list(inputs)
235
+
236
+ def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
237
+ """Process the inputs into a model-feedable format.
238
+
239
+ Customize your preprocess by overriding this method. Preprocess should
240
+ return an iterable object, of which each item will be used as the
241
+ input of ``model.test_step``.
242
+
243
+ ``BaseInferencer.preprocess`` will return an iterable chunked data,
244
+ which will be used in __call__ like this:
245
+
246
+ .. code-block:: python
247
+
248
+ def __call__(self, inputs, batch_size=1, **kwargs):
249
+ chunked_data = self.preprocess(inputs, batch_size, **kwargs)
250
+ for batch in chunked_data:
251
+ preds = self.forward(batch, **kwargs)
252
+
253
+ Args:
254
+ inputs (InputsType): Inputs given by user.
255
+ batch_size (int): batch size. Defaults to 1.
256
+
257
+ Yields:
258
+ Any: Data processed by the ``pipeline`` and ``collate_fn``.
259
+ """
260
+ chunked_data = self._get_chunk_data(inputs, batch_size)
261
+ yield from map(self.collate_fn, chunked_data)
262
+
263
+ def _get_chunk_data(self, inputs: Iterable, chunk_size: int):
264
+ """Get batch data from inputs.
265
+
266
+ Args:
267
+ inputs (Iterable): An iterable dataset.
268
+ chunk_size (int): Equivalent to batch size.
269
+
270
+ Yields:
271
+ list: batch data.
272
+ """
273
+ inputs_iter = iter(inputs)
274
+ while True:
275
+ try:
276
+ chunk_data = []
277
+ for _ in range(chunk_size):
278
+ inputs_ = next(inputs_iter)
279
+ if isinstance(inputs_, dict):
280
+ if 'img' in inputs_:
281
+ ori_inputs_ = inputs_['img']
282
+ else:
283
+ ori_inputs_ = inputs_['img_path']
284
+ chunk_data.append(
285
+ (ori_inputs_,
286
+ self.pipeline(copy.deepcopy(inputs_))))
287
+ else:
288
+ chunk_data.append((inputs_, self.pipeline(inputs_)))
289
+ yield chunk_data
290
+ except StopIteration:
291
+ if chunk_data:
292
+ yield chunk_data
293
+ break
294
+
295
+ # TODO: Video and Webcam are currently not supported and
296
+ # may consume too much memory if your input folder has a lot of images.
297
+ # We will be optimized later.
298
+ def __call__(
299
+ self,
300
+ inputs: InputsType,
301
+ batch_size: int = 1,
302
+ return_vis: bool = False,
303
+ show: bool = False,
304
+ wait_time: int = 0,
305
+ no_save_vis: bool = False,
306
+ draw_pred: bool = True,
307
+ pred_score_thr: float = 0.3,
308
+ return_datasamples: bool = False,
309
+ print_result: bool = False,
310
+ no_save_pred: bool = True,
311
+ out_dir: str = '',
312
+ # by open image task
313
+ texts: Optional[Union[str, list]] = None,
314
+ # by open panoptic task
315
+ stuff_texts: Optional[Union[str, list]] = None,
316
+ # by GLIP
317
+ custom_entities: bool = False,
318
+ **kwargs) -> dict:
319
+ """Call the inferencer.
320
+
321
+ Args:
322
+ inputs (InputsType): Inputs for the inferencer.
323
+ batch_size (int): Inference batch size. Defaults to 1.
324
+ show (bool): Whether to display the visualization results in a
325
+ popup window. Defaults to False.
326
+ wait_time (float): The interval of show (s). Defaults to 0.
327
+ no_save_vis (bool): Whether to force not to save prediction
328
+ vis results. Defaults to False.
329
+ draw_pred (bool): Whether to draw predicted bounding boxes.
330
+ Defaults to True.
331
+ pred_score_thr (float): Minimum score of bboxes to draw.
332
+ Defaults to 0.3.
333
+ return_datasamples (bool): Whether to return results as
334
+ :obj:`DetDataSample`. Defaults to False.
335
+ print_result (bool): Whether to print the inference result w/o
336
+ visualization to the console. Defaults to False.
337
+ no_save_pred (bool): Whether to force not to save prediction
338
+ results. Defaults to True.
339
+ out_dir: Dir to save the inference results or
340
+ visualization. If left as empty, no file will be saved.
341
+ Defaults to ''.
342
+ texts (str | list[str]): Text prompts. Defaults to None.
343
+ stuff_texts (str | list[str]): Stuff text prompts of open
344
+ panoptic task. Defaults to None.
345
+ custom_entities (bool): Whether to use custom entities.
346
+ Defaults to False. Only used in GLIP.
347
+ **kwargs: Other keyword arguments passed to :meth:`preprocess`,
348
+ :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
349
+ Each key in kwargs should be in the corresponding set of
350
+ ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
351
+ and ``postprocess_kwargs``.
352
+
353
+ Returns:
354
+ dict: Inference and visualization results.
355
+ """
356
+ (
357
+ preprocess_kwargs,
358
+ forward_kwargs,
359
+ visualize_kwargs,
360
+ postprocess_kwargs,
361
+ ) = self._dispatch_kwargs(**kwargs)
362
+
363
+ ori_inputs = self._inputs_to_list(inputs)
364
+
365
+ if texts is not None and isinstance(texts, str):
366
+ texts = [texts] * len(ori_inputs)
367
+ if stuff_texts is not None and isinstance(stuff_texts, str):
368
+ stuff_texts = [stuff_texts] * len(ori_inputs)
369
+ if texts is not None:
370
+ assert len(texts) == len(ori_inputs)
371
+ for i in range(len(texts)):
372
+ if isinstance(ori_inputs[i], str):
373
+ ori_inputs[i] = {
374
+ 'text': texts[i],
375
+ 'img_path': ori_inputs[i],
376
+ 'custom_entities': custom_entities
377
+ }
378
+ else:
379
+ ori_inputs[i] = {
380
+ 'text': texts[i],
381
+ 'img': ori_inputs[i],
382
+ 'custom_entities': custom_entities
383
+ }
384
+ if stuff_texts is not None:
385
+ assert len(stuff_texts) == len(ori_inputs)
386
+ for i in range(len(stuff_texts)):
387
+ ori_inputs[i]['stuff_text'] = stuff_texts[i]
388
+
389
+ inputs = self.preprocess(
390
+ ori_inputs, batch_size=batch_size, **preprocess_kwargs)
391
+
392
+ results_dict = {'predictions': [], 'visualization': []}
393
+ for ori_imgs, data in (track(inputs, description='Inference')
394
+ if self.show_progress else inputs):
395
+ preds = self.forward(data, **forward_kwargs)
396
+ visualization = self.visualize(
397
+ ori_imgs,
398
+ preds,
399
+ return_vis=return_vis,
400
+ show=show,
401
+ wait_time=wait_time,
402
+ draw_pred=draw_pred,
403
+ pred_score_thr=pred_score_thr,
404
+ no_save_vis=no_save_vis,
405
+ img_out_dir=out_dir,
406
+ **visualize_kwargs)
407
+ results = self.postprocess(
408
+ preds,
409
+ visualization,
410
+ return_datasamples=return_datasamples,
411
+ print_result=print_result,
412
+ no_save_pred=no_save_pred,
413
+ pred_out_dir=out_dir,
414
+ **postprocess_kwargs)
415
+ results_dict['predictions'].extend(results['predictions'])
416
+ if results['visualization'] is not None:
417
+ results_dict['visualization'].extend(results['visualization'])
418
+ return results_dict
419
+
420
+ def visualize(self,
421
+ inputs: InputsType,
422
+ preds: PredType,
423
+ return_vis: bool = False,
424
+ show: bool = False,
425
+ wait_time: int = 0,
426
+ draw_pred: bool = True,
427
+ pred_score_thr: float = 0.3,
428
+ no_save_vis: bool = False,
429
+ img_out_dir: str = '',
430
+ **kwargs) -> Union[List[np.ndarray], None]:
431
+ """Visualize predictions.
432
+
433
+ Args:
434
+ inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
435
+ preds (List[:obj:`DetDataSample`]): Predictions of the model.
436
+ return_vis (bool): Whether to return the visualization result.
437
+ Defaults to False.
438
+ show (bool): Whether to display the image in a popup window.
439
+ Defaults to False.
440
+ wait_time (float): The interval of show (s). Defaults to 0.
441
+ draw_pred (bool): Whether to draw predicted bounding boxes.
442
+ Defaults to True.
443
+ pred_score_thr (float): Minimum score of bboxes to draw.
444
+ Defaults to 0.3.
445
+ no_save_vis (bool): Whether to force not to save prediction
446
+ vis results. Defaults to False.
447
+ img_out_dir (str): Output directory of visualization results.
448
+ If left as empty, no file will be saved. Defaults to ''.
449
+
450
+ Returns:
451
+ List[np.ndarray] or None: Returns visualization results only if
452
+ applicable.
453
+ """
454
+ if no_save_vis is True:
455
+ img_out_dir = ''
456
+
457
+ if not show and img_out_dir == '' and not return_vis:
458
+ return None
459
+
460
+ if self.visualizer is None:
461
+ raise ValueError('Visualization needs the "visualizer" term'
462
+ 'defined in the config, but got None.')
463
+
464
+ results = []
465
+
466
+ for single_input, pred in zip(inputs, preds):
467
+ if isinstance(single_input, str):
468
+ img_bytes = mmengine.fileio.get(single_input)
469
+ img = mmcv.imfrombytes(img_bytes)
470
+ img = img[:, :, ::-1]
471
+ img_name = osp.basename(single_input)
472
+ elif isinstance(single_input, np.ndarray):
473
+ img = single_input.copy()
474
+ img_num = str(self.num_visualized_imgs).zfill(8)
475
+ img_name = f'{img_num}.jpg'
476
+ else:
477
+ raise ValueError('Unsupported input type: '
478
+ f'{type(single_input)}')
479
+
480
+ out_file = osp.join(img_out_dir, 'vis',
481
+ img_name) if img_out_dir != '' else None
482
+
483
+ self.visualizer.add_datasample(
484
+ img_name,
485
+ img,
486
+ pred,
487
+ show=show,
488
+ wait_time=wait_time,
489
+ draw_gt=False,
490
+ draw_pred=draw_pred,
491
+ pred_score_thr=pred_score_thr,
492
+ out_file=out_file,
493
+ )
494
+ results.append(self.visualizer.get_image())
495
+ self.num_visualized_imgs += 1
496
+
497
+ return results
498
+
499
+ def postprocess(
500
+ self,
501
+ preds: PredType,
502
+ visualization: Optional[List[np.ndarray]] = None,
503
+ return_datasamples: bool = False,
504
+ print_result: bool = False,
505
+ no_save_pred: bool = False,
506
+ pred_out_dir: str = '',
507
+ **kwargs,
508
+ ) -> Dict:
509
+ """Process the predictions and visualization results from ``forward``
510
+ and ``visualize``.
511
+
512
+ This method should be responsible for the following tasks:
513
+
514
+ 1. Convert datasamples into a json-serializable dict if needed.
515
+ 2. Pack the predictions and visualization results and return them.
516
+ 3. Dump or log the predictions.
517
+
518
+ Args:
519
+ preds (List[:obj:`DetDataSample`]): Predictions of the model.
520
+ visualization (Optional[np.ndarray]): Visualized predictions.
521
+ return_datasamples (bool): Whether to use Datasample to store
522
+ inference results. If False, dict will be used.
523
+ print_result (bool): Whether to print the inference result w/o
524
+ visualization to the console. Defaults to False.
525
+ no_save_pred (bool): Whether to force not to save prediction
526
+ results. Defaults to False.
527
+ pred_out_dir: Dir to save the inference results w/o
528
+ visualization. If left as empty, no file will be saved.
529
+ Defaults to ''.
530
+
531
+ Returns:
532
+ dict: Inference and visualization results with key ``predictions``
533
+ and ``visualization``.
534
+
535
+ - ``visualization`` (Any): Returned by :meth:`visualize`.
536
+ - ``predictions`` (dict or DataSample): Returned by
537
+ :meth:`forward` and processed in :meth:`postprocess`.
538
+ If ``return_datasamples=False``, it usually should be a
539
+ json-serializable dict containing only basic data elements such
540
+ as strings and numbers.
541
+ """
542
+ if no_save_pred is True:
543
+ pred_out_dir = ''
544
+
545
+ result_dict = {}
546
+ results = preds
547
+ if not return_datasamples:
548
+ results = []
549
+ for pred in preds:
550
+ result = self.pred2dict(pred, pred_out_dir)
551
+ results.append(result)
552
+ elif pred_out_dir != '':
553
+ warnings.warn('Currently does not support saving datasample '
554
+ 'when return_datasamples is set to True. '
555
+ 'Prediction results are not saved!')
556
+ # Add img to the results after printing and dumping
557
+ result_dict['predictions'] = results
558
+ if print_result:
559
+ print(result_dict)
560
+ result_dict['visualization'] = visualization
561
+ return result_dict
562
+
563
+ # TODO: The data format and fields saved in json need further discussion.
564
+ # Maybe should include model name, timestamp, filename, image info etc.
565
+ def pred2dict(self,
566
+ data_sample: DetDataSample,
567
+ pred_out_dir: str = '') -> Dict:
568
+ """Extract elements necessary to represent a prediction into a
569
+ dictionary.
570
+
571
+ It's better to contain only basic data elements such as strings and
572
+ numbers in order to guarantee it's json-serializable.
573
+
574
+ Args:
575
+ data_sample (:obj:`DetDataSample`): Predictions of the model.
576
+ pred_out_dir: Dir to save the inference results w/o
577
+ visualization. If left as empty, no file will be saved.
578
+ Defaults to ''.
579
+
580
+ Returns:
581
+ dict: Prediction results.
582
+ """
583
+ is_save_pred = True
584
+ if pred_out_dir == '':
585
+ is_save_pred = False
586
+
587
+ if is_save_pred and 'img_path' in data_sample:
588
+ img_path = osp.basename(data_sample.img_path)
589
+ img_path = osp.splitext(img_path)[0]
590
+ out_img_path = osp.join(pred_out_dir, 'preds',
591
+ img_path + '_panoptic_seg.png')
592
+ out_json_path = osp.join(pred_out_dir, 'preds', img_path + '.json')
593
+ elif is_save_pred:
594
+ out_img_path = osp.join(
595
+ pred_out_dir, 'preds',
596
+ f'{self.num_predicted_imgs}_panoptic_seg.png')
597
+ out_json_path = osp.join(pred_out_dir, 'preds',
598
+ f'{self.num_predicted_imgs}.json')
599
+ self.num_predicted_imgs += 1
600
+
601
+ result = {}
602
+ if 'pred_instances' in data_sample:
603
+ masks = data_sample.pred_instances.get('masks')
604
+ pred_instances = data_sample.pred_instances.numpy()
605
+ result = {
606
+ 'labels': pred_instances.labels.tolist(),
607
+ 'scores': pred_instances.scores.tolist()
608
+ }
609
+ if 'bboxes' in pred_instances:
610
+ result['bboxes'] = pred_instances.bboxes.tolist()
611
+ if masks is not None:
612
+ if 'bboxes' not in pred_instances or pred_instances.bboxes.sum(
613
+ ) == 0:
614
+ # Fake bbox, such as the SOLO.
615
+ bboxes = mask2bbox(masks.cpu()).numpy().tolist()
616
+ result['bboxes'] = bboxes
617
+ encode_masks = encode_mask_results(pred_instances.masks)
618
+ for encode_mask in encode_masks:
619
+ if isinstance(encode_mask['counts'], bytes):
620
+ encode_mask['counts'] = encode_mask['counts'].decode()
621
+ result['masks'] = encode_masks
622
+
623
+ if 'pred_panoptic_seg' in data_sample:
624
+ if VOID is None:
625
+ raise RuntimeError(
626
+ 'panopticapi is not installed, please install it by: '
627
+ 'pip install git+https://github.com/cocodataset/'
628
+ 'panopticapi.git.')
629
+
630
+ pan = data_sample.pred_panoptic_seg.sem_seg.cpu().numpy()[0]
631
+ pan[pan % INSTANCE_OFFSET == len(
632
+ self.model.dataset_meta['classes'])] = VOID
633
+ pan = id2rgb(pan).astype(np.uint8)
634
+
635
+ if is_save_pred:
636
+ mmcv.imwrite(pan[:, :, ::-1], out_img_path)
637
+ result['panoptic_seg_path'] = out_img_path
638
+ else:
639
+ result['panoptic_seg'] = pan
640
+
641
+ if is_save_pred:
642
+ mmengine.dump(result, out_json_path)
643
+
644
+ return result
mmdet/apis/inference.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import copy
3
+ import warnings
4
+ from pathlib import Path
5
+ from typing import Optional, Sequence, Union
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+ from mmcv.ops import RoIPool
11
+ from mmcv.transforms import Compose
12
+ from mmengine.config import Config
13
+ from mmengine.dataset import default_collate
14
+ from mmengine.model.utils import revert_sync_batchnorm
15
+ from mmengine.registry import init_default_scope
16
+ from mmengine.runner import load_checkpoint
17
+
18
+ from mmdet.registry import DATASETS
19
+ from mmdet.utils import ConfigType
20
+ from ..evaluation import get_classes
21
+ from ..registry import MODELS
22
+ from ..structures import DetDataSample, SampleList
23
+ from ..utils import get_test_pipeline_cfg
24
+
25
+
26
+ def init_detector(
27
+ config: Union[str, Path, Config],
28
+ checkpoint: Optional[str] = None,
29
+ palette: str = 'none',
30
+ device: str = 'cuda:0',
31
+ cfg_options: Optional[dict] = None,
32
+ ) -> nn.Module:
33
+ """Initialize a detector from config file.
34
+
35
+ Args:
36
+ config (str, :obj:`Path`, or :obj:`mmengine.Config`): Config file path,
37
+ :obj:`Path`, or the config object.
38
+ checkpoint (str, optional): Checkpoint path. If left as None, the model
39
+ will not load any weights.
40
+ palette (str): Color palette used for visualization. If palette
41
+ is stored in checkpoint, use checkpoint's palette first, otherwise
42
+ use externally passed palette. Currently, supports 'coco', 'voc',
43
+ 'citys' and 'random'. Defaults to none.
44
+ device (str): The device where the anchors will be put on.
45
+ Defaults to cuda:0.
46
+ cfg_options (dict, optional): Options to override some settings in
47
+ the used config.
48
+
49
+ Returns:
50
+ nn.Module: The constructed detector.
51
+ """
52
+ if isinstance(config, (str, Path)):
53
+ config = Config.fromfile(config)
54
+ elif not isinstance(config, Config):
55
+ raise TypeError('config must be a filename or Config object, '
56
+ f'but got {type(config)}')
57
+ if cfg_options is not None:
58
+ config.merge_from_dict(cfg_options)
59
+ elif 'init_cfg' in config.model.backbone:
60
+ config.model.backbone.init_cfg = None
61
+
62
+ scope = config.get('default_scope', 'mmdet')
63
+ if scope is not None:
64
+ init_default_scope(config.get('default_scope', 'mmdet'))
65
+
66
+ model = MODELS.build(config.model)
67
+ model = revert_sync_batchnorm(model)
68
+ if checkpoint is None:
69
+ warnings.simplefilter('once')
70
+ warnings.warn('checkpoint is None, use COCO classes by default.')
71
+ model.dataset_meta = {'classes': get_classes('coco')}
72
+ else:
73
+ checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
74
+ # Weights converted from elsewhere may not have meta fields.
75
+ checkpoint_meta = checkpoint.get('meta', {})
76
+
77
+ # save the dataset_meta in the model for convenience
78
+ if 'dataset_meta' in checkpoint_meta:
79
+ # mmdet 3.x, all keys should be lowercase
80
+ model.dataset_meta = {
81
+ k.lower(): v
82
+ for k, v in checkpoint_meta['dataset_meta'].items()
83
+ }
84
+ elif 'CLASSES' in checkpoint_meta:
85
+ # < mmdet 3.x
86
+ classes = checkpoint_meta['CLASSES']
87
+ model.dataset_meta = {'classes': classes}
88
+ else:
89
+ warnings.simplefilter('once')
90
+ warnings.warn(
91
+ 'dataset_meta or class names are not saved in the '
92
+ 'checkpoint\'s meta data, use COCO classes by default.')
93
+ model.dataset_meta = {'classes': get_classes('coco')}
94
+
95
+ # Priority: args.palette -> config -> checkpoint
96
+ if palette != 'none':
97
+ model.dataset_meta['palette'] = palette
98
+ else:
99
+ test_dataset_cfg = copy.deepcopy(config.test_dataloader.dataset)
100
+ # lazy init. We only need the metainfo.
101
+ test_dataset_cfg['lazy_init'] = True
102
+ metainfo = DATASETS.build(test_dataset_cfg).metainfo
103
+ cfg_palette = metainfo.get('palette', None)
104
+ if cfg_palette is not None:
105
+ model.dataset_meta['palette'] = cfg_palette
106
+ else:
107
+ if 'palette' not in model.dataset_meta:
108
+ warnings.warn(
109
+ 'palette does not exist, random is used by default. '
110
+ 'You can also set the palette to customize.')
111
+ model.dataset_meta['palette'] = 'random'
112
+
113
+ model.cfg = config # save the config in the model for convenience
114
+ model.to(device)
115
+ model.eval()
116
+ return model
117
+
118
+
119
+ ImagesType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]]
120
+
121
+
122
+ def inference_detector(
123
+ model: nn.Module,
124
+ imgs: ImagesType,
125
+ test_pipeline: Optional[Compose] = None,
126
+ text_prompt: Optional[str] = None,
127
+ custom_entities: bool = False,
128
+ ) -> Union[DetDataSample, SampleList]:
129
+ """Inference image(s) with the detector.
130
+
131
+ Args:
132
+ model (nn.Module): The loaded detector.
133
+ imgs (str, ndarray, Sequence[str/ndarray]):
134
+ Either image files or loaded images.
135
+ test_pipeline (:obj:`Compose`): Test pipeline.
136
+
137
+ Returns:
138
+ :obj:`DetDataSample` or list[:obj:`DetDataSample`]:
139
+ If imgs is a list or tuple, the same length list type results
140
+ will be returned, otherwise return the detection results directly.
141
+ """
142
+
143
+ if isinstance(imgs, (list, tuple)):
144
+ is_batch = True
145
+ else:
146
+ imgs = [imgs]
147
+ is_batch = False
148
+
149
+ cfg = model.cfg
150
+
151
+ if test_pipeline is None:
152
+ cfg = cfg.copy()
153
+ test_pipeline = get_test_pipeline_cfg(cfg)
154
+ if isinstance(imgs[0], np.ndarray):
155
+ # Calling this method across libraries will result
156
+ # in module unregistered error if not prefixed with mmdet.
157
+ test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
158
+
159
+ test_pipeline = Compose(test_pipeline)
160
+
161
+ if model.data_preprocessor.device.type == 'cpu':
162
+ for m in model.modules():
163
+ assert not isinstance(
164
+ m, RoIPool
165
+ ), 'CPU inference with RoIPool is not supported currently.'
166
+
167
+ result_list = []
168
+ for i, img in enumerate(imgs):
169
+ # prepare data
170
+ if isinstance(img, np.ndarray):
171
+ # TODO: remove img_id.
172
+ data_ = dict(img=img, img_id=0)
173
+ else:
174
+ # TODO: remove img_id.
175
+ data_ = dict(img_path=img, img_id=0)
176
+
177
+ if text_prompt:
178
+ data_['text'] = text_prompt
179
+ data_['custom_entities'] = custom_entities
180
+
181
+ # build the data pipeline
182
+ data_ = test_pipeline(data_)
183
+
184
+ data_['inputs'] = [data_['inputs']]
185
+ data_['data_samples'] = [data_['data_samples']]
186
+
187
+ # forward the model
188
+ with torch.no_grad():
189
+ results = model.test_step(data_)[0]
190
+
191
+ result_list.append(results)
192
+
193
+ if not is_batch:
194
+ return result_list[0]
195
+ else:
196
+ return result_list
197
+
198
+
199
+ # TODO: Awaiting refactoring
200
+ async def async_inference_detector(model, imgs):
201
+ """Async inference image(s) with the detector.
202
+
203
+ Args:
204
+ model (nn.Module): The loaded detector.
205
+ img (str | ndarray): Either image files or loaded images.
206
+
207
+ Returns:
208
+ Awaitable detection results.
209
+ """
210
+ if not isinstance(imgs, (list, tuple)):
211
+ imgs = [imgs]
212
+
213
+ cfg = model.cfg
214
+
215
+ if isinstance(imgs[0], np.ndarray):
216
+ cfg = cfg.copy()
217
+ # set loading pipeline type
218
+ cfg.data.test.pipeline[0].type = 'LoadImageFromNDArray'
219
+
220
+ # cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
221
+ test_pipeline = Compose(cfg.data.test.pipeline)
222
+
223
+ datas = []
224
+ for img in imgs:
225
+ # prepare data
226
+ if isinstance(img, np.ndarray):
227
+ # directly add img
228
+ data = dict(img=img)
229
+ else:
230
+ # add information into dict
231
+ data = dict(img_info=dict(filename=img), img_prefix=None)
232
+ # build the data pipeline
233
+ data = test_pipeline(data)
234
+ datas.append(data)
235
+
236
+ for m in model.modules():
237
+ assert not isinstance(
238
+ m,
239
+ RoIPool), 'CPU inference with RoIPool is not supported currently.'
240
+
241
+ # We don't restore `torch.is_grad_enabled()` value during concurrent
242
+ # inference since execution can overlap
243
+ torch.set_grad_enabled(False)
244
+ results = await model.aforward_test(data, rescale=True)
245
+ return results
246
+
247
+
248
+ def build_test_pipeline(cfg: ConfigType) -> ConfigType:
249
+ """Build test_pipeline for mot/vis demo. In mot/vis infer, original
250
+ test_pipeline should remove the "LoadImageFromFile" and
251
+ "LoadTrackAnnotations".
252
+
253
+ Args:
254
+ cfg (ConfigDict): The loaded config.
255
+ Returns:
256
+ ConfigType: new test_pipeline
257
+ """
258
+ # remove the "LoadImageFromFile" and "LoadTrackAnnotations" in pipeline
259
+ transform_broadcaster = cfg.test_dataloader.dataset.pipeline[0].copy()
260
+ for transform in transform_broadcaster['transforms']:
261
+ if transform['type'] == 'Resize':
262
+ transform_broadcaster['transforms'] = transform
263
+ pack_track_inputs = cfg.test_dataloader.dataset.pipeline[-1].copy()
264
+ test_pipeline = Compose([transform_broadcaster, pack_track_inputs])
265
+
266
+ return test_pipeline
267
+
268
+
269
+ def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
270
+ video_len: int) -> SampleList:
271
+ """Inference image(s) with the mot model.
272
+
273
+ Args:
274
+ model (nn.Module): The loaded mot model.
275
+ img (np.ndarray): Loaded image.
276
+ frame_id (int): frame id.
277
+ video_len (int): demo video length
278
+ Returns:
279
+ SampleList: The tracking data samples.
280
+ """
281
+ cfg = model.cfg
282
+ data = dict(
283
+ img=[img.astype(np.float32)],
284
+ frame_id=[frame_id],
285
+ ori_shape=[img.shape[:2]],
286
+ img_id=[frame_id + 1],
287
+ ori_video_length=[video_len])
288
+
289
+ test_pipeline = build_test_pipeline(cfg)
290
+ data = test_pipeline(data)
291
+
292
+ if not next(model.parameters()).is_cuda:
293
+ for m in model.modules():
294
+ assert not isinstance(
295
+ m, RoIPool
296
+ ), 'CPU inference with RoIPool is not supported currently.'
297
+
298
+ # forward the model
299
+ with torch.no_grad():
300
+ data = default_collate([data])
301
+ result = model.test_step(data)[0]
302
+ return result
303
+
304
+
305
+ def init_track_model(config: Union[str, Config],
306
+ checkpoint: Optional[str] = None,
307
+ detector: Optional[str] = None,
308
+ reid: Optional[str] = None,
309
+ device: str = 'cuda:0',
310
+ cfg_options: Optional[dict] = None) -> nn.Module:
311
+ """Initialize a model from config file.
312
+
313
+ Args:
314
+ config (str or :obj:`mmengine.Config`): Config file path or the config
315
+ object.
316
+ checkpoint (Optional[str], optional): Checkpoint path. Defaults to
317
+ None.
318
+ detector (Optional[str], optional): Detector Checkpoint path, use in
319
+ some tracking algorithms like sort. Defaults to None.
320
+ reid (Optional[str], optional): Reid checkpoint path. use in
321
+ some tracking algorithms like sort. Defaults to None.
322
+ device (str, optional): The device that the model inferences on.
323
+ Defaults to `cuda:0`.
324
+ cfg_options (Optional[dict], optional): Options to override some
325
+ settings in the used config. Defaults to None.
326
+
327
+ Returns:
328
+ nn.Module: The constructed model.
329
+ """
330
+ if isinstance(config, str):
331
+ config = Config.fromfile(config)
332
+ elif not isinstance(config, Config):
333
+ raise TypeError('config must be a filename or Config object, '
334
+ f'but got {type(config)}')
335
+ if cfg_options is not None:
336
+ config.merge_from_dict(cfg_options)
337
+
338
+ model = MODELS.build(config.model)
339
+
340
+ if checkpoint is not None:
341
+ checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
342
+ # Weights converted from elsewhere may not have meta fields.
343
+ checkpoint_meta = checkpoint.get('meta', {})
344
+ # save the dataset_meta in the model for convenience
345
+ if 'dataset_meta' in checkpoint_meta:
346
+ if 'CLASSES' in checkpoint_meta['dataset_meta']:
347
+ value = checkpoint_meta['dataset_meta'].pop('CLASSES')
348
+ checkpoint_meta['dataset_meta']['classes'] = value
349
+ model.dataset_meta = checkpoint_meta['dataset_meta']
350
+
351
+ if detector is not None:
352
+ assert not (checkpoint and detector), \
353
+ 'Error: checkpoint and detector checkpoint cannot both exist'
354
+ load_checkpoint(model.detector, detector, map_location='cpu')
355
+
356
+ if reid is not None:
357
+ assert not (checkpoint and reid), \
358
+ 'Error: checkpoint and reid checkpoint cannot both exist'
359
+ load_checkpoint(model.reid, reid, map_location='cpu')
360
+
361
+ # Some methods don't load checkpoints or checkpoints don't contain
362
+ # 'dataset_meta'
363
+ # VIS need dataset_meta, MOT don't need dataset_meta
364
+ if not hasattr(model, 'dataset_meta'):
365
+ warnings.warn('dataset_meta or class names are missed, '
366
+ 'use None by default.')
367
+ model.dataset_meta = {'classes': None}
368
+
369
+ model.cfg = config # save the config in the model for convenience
370
+ model.to(device)
371
+ model.eval()
372
+ return model
mmdet/configs/.DS_Store ADDED
Binary file (8.2 kB). View file
 
mmdet/configs/_base_/datasets/coco_detection.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
6
+ from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
7
+ RandomFlip, Resize)
8
+ from mmdet.evaluation import CocoMetric
9
+
10
+ # dataset settings
11
+ dataset_type = CocoDataset
12
+ data_root = 'data/coco/'
13
+
14
+ # Example to use different file client
15
+ # Method 1: simply set the data root and let the file I/O module
16
+ # automatically infer from prefix (not support LMDB and Memcache yet)
17
+
18
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
19
+
20
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
21
+ # backend_args = dict(
22
+ # backend='petrel',
23
+ # path_mapping=dict({
24
+ # './data/': 's3://openmmlab/datasets/detection/',
25
+ # 'data/': 's3://openmmlab/datasets/detection/'
26
+ # }))
27
+ backend_args = None
28
+
29
+ train_pipeline = [
30
+ dict(type=LoadImageFromFile, backend_args=backend_args),
31
+ dict(type=LoadAnnotations, with_bbox=True),
32
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
33
+ dict(type=RandomFlip, prob=0.5),
34
+ dict(type=PackDetInputs)
35
+ ]
36
+ test_pipeline = [
37
+ dict(type=LoadImageFromFile, backend_args=backend_args),
38
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
39
+ # If you don't have a gt annotation, delete the pipeline
40
+ dict(type=LoadAnnotations, with_bbox=True),
41
+ dict(
42
+ type=PackDetInputs,
43
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
44
+ 'scale_factor'))
45
+ ]
46
+ train_dataloader = dict(
47
+ batch_size=2,
48
+ num_workers=2,
49
+ persistent_workers=True,
50
+ sampler=dict(type=DefaultSampler, shuffle=True),
51
+ batch_sampler=dict(type=AspectRatioBatchSampler),
52
+ dataset=dict(
53
+ type=dataset_type,
54
+ data_root=data_root,
55
+ ann_file='annotations/instances_train2017.json',
56
+ data_prefix=dict(img='train2017/'),
57
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
58
+ pipeline=train_pipeline,
59
+ backend_args=backend_args))
60
+ val_dataloader = dict(
61
+ batch_size=1,
62
+ num_workers=2,
63
+ persistent_workers=True,
64
+ drop_last=False,
65
+ sampler=dict(type=DefaultSampler, shuffle=False),
66
+ dataset=dict(
67
+ type=dataset_type,
68
+ data_root=data_root,
69
+ ann_file='annotations/instances_val2017.json',
70
+ data_prefix=dict(img='val2017/'),
71
+ test_mode=True,
72
+ pipeline=test_pipeline,
73
+ backend_args=backend_args))
74
+ test_dataloader = val_dataloader
75
+
76
+ val_evaluator = dict(
77
+ type=CocoMetric,
78
+ ann_file=data_root + 'annotations/instances_val2017.json',
79
+ metric='bbox',
80
+ format_only=False,
81
+ backend_args=backend_args)
82
+ test_evaluator = val_evaluator
83
+
84
+ # inference on test dataset and
85
+ # format the output results for submission.
86
+ # test_dataloader = dict(
87
+ # batch_size=1,
88
+ # num_workers=2,
89
+ # persistent_workers=True,
90
+ # drop_last=False,
91
+ # sampler=dict(type=DefaultSampler, shuffle=False),
92
+ # dataset=dict(
93
+ # type=dataset_type,
94
+ # data_root=data_root,
95
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
96
+ # data_prefix=dict(img='test2017/'),
97
+ # test_mode=True,
98
+ # pipeline=test_pipeline))
99
+ # test_evaluator = dict(
100
+ # type=CocoMetric,
101
+ # metric='bbox',
102
+ # format_only=True,
103
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
104
+ # outfile_prefix='./work_dirs/coco_detection/test')
mmdet/configs/_base_/datasets/coco_instance.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms.loading import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets.coco import CocoDataset
6
+ from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
7
+ from mmdet.datasets.transforms.formatting import PackDetInputs
8
+ from mmdet.datasets.transforms.loading import LoadAnnotations
9
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
10
+ from mmdet.evaluation.metrics.coco_metric import CocoMetric
11
+
12
+ # dataset settings
13
+ dataset_type = 'CocoDataset'
14
+ data_root = 'data/coco/'
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
21
+
22
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection/',
27
+ # 'data/': 's3://openmmlab/datasets/detection/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(type=LoadImageFromFile, backend_args=backend_args),
33
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
34
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
35
+ dict(type=RandomFlip, prob=0.5),
36
+ dict(type=PackDetInputs)
37
+ ]
38
+ test_pipeline = [
39
+ dict(type=LoadImageFromFile, backend_args=backend_args),
40
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
41
+ # If you don't have a gt annotation, delete the pipeline
42
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
43
+ dict(
44
+ type=PackDetInputs,
45
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
46
+ 'scale_factor'))
47
+ ]
48
+ train_dataloader = dict(
49
+ batch_size=2,
50
+ num_workers=2,
51
+ persistent_workers=True,
52
+ sampler=dict(type=DefaultSampler, shuffle=True),
53
+ batch_sampler=dict(type=AspectRatioBatchSampler),
54
+ dataset=dict(
55
+ type=CocoDataset,
56
+ data_root=data_root,
57
+ ann_file='annotations/instances_train2017.json',
58
+ data_prefix=dict(img='train2017/'),
59
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
60
+ pipeline=train_pipeline,
61
+ backend_args=backend_args))
62
+ val_dataloader = dict(
63
+ batch_size=1,
64
+ num_workers=2,
65
+ persistent_workers=True,
66
+ drop_last=False,
67
+ sampler=dict(type=DefaultSampler, shuffle=False),
68
+ dataset=dict(
69
+ type=CocoDataset,
70
+ data_root=data_root,
71
+ ann_file='annotations/instances_val2017.json',
72
+ data_prefix=dict(img='val2017/'),
73
+ test_mode=True,
74
+ pipeline=test_pipeline,
75
+ backend_args=backend_args))
76
+ test_dataloader = val_dataloader
77
+
78
+ val_evaluator = dict(
79
+ type=CocoMetric,
80
+ ann_file=data_root + 'annotations/instances_val2017.json',
81
+ metric=['bbox', 'segm'],
82
+ format_only=False,
83
+ backend_args=backend_args)
84
+ test_evaluator = val_evaluator
85
+
86
+ # inference on test dataset and
87
+ # format the output results for submission.
88
+ # test_dataloader = dict(
89
+ # batch_size=1,
90
+ # num_workers=2,
91
+ # persistent_workers=True,
92
+ # drop_last=False,
93
+ # sampler=dict(type=DefaultSampler, shuffle=False),
94
+ # dataset=dict(
95
+ # type=CocoDataset,
96
+ # data_root=data_root,
97
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
98
+ # data_prefix=dict(img='test2017/'),
99
+ # test_mode=True,
100
+ # pipeline=test_pipeline))
101
+ # test_evaluator = dict(
102
+ # type=CocoMetric,
103
+ # metric=['bbox', 'segm'],
104
+ # format_only=True,
105
+ # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
106
+ # outfile_prefix='./work_dirs/coco_instance/test')
mmdet/configs/_base_/datasets/coco_instance_semantic.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms.loading import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets.coco import CocoDataset
6
+ from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
7
+ from mmdet.datasets.transforms.formatting import PackDetInputs
8
+ from mmdet.datasets.transforms.loading import LoadAnnotations
9
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
10
+ from mmdet.evaluation.metrics.coco_metric import CocoMetric
11
+
12
+ # dataset settings
13
+ dataset_type = 'CocoDataset'
14
+ data_root = 'data/coco/'
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
21
+
22
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection/',
27
+ # 'data/': 's3://openmmlab/datasets/detection/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(type=LoadImageFromFile, backend_args=backend_args),
33
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
34
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
35
+ dict(type=RandomFlip, prob=0.5),
36
+ dict(type=PackDetInputs)
37
+ ]
38
+ test_pipeline = [
39
+ dict(type=LoadImageFromFile, backend_args=backend_args),
40
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
41
+ # If you don't have a gt annotation, delete the pipeline
42
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
43
+ dict(
44
+ type=PackDetInputs,
45
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
46
+ 'scale_factor'))
47
+ ]
48
+
49
+ train_dataloader = dict(
50
+ batch_size=2,
51
+ num_workers=2,
52
+ persistent_workers=True,
53
+ sampler=dict(type=DefaultSampler, shuffle=True),
54
+ batch_sampler=dict(type=AspectRatioBatchSampler),
55
+ dataset=dict(
56
+ type=CocoDataset,
57
+ data_root=data_root,
58
+ ann_file='annotations/instances_train2017.json',
59
+ data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'),
60
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
61
+ pipeline=train_pipeline,
62
+ backend_args=backend_args))
63
+
64
+ val_dataloader = dict(
65
+ batch_size=1,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ drop_last=False,
69
+ sampler=dict(type=DefaultSampler, shuffle=False),
70
+ dataset=dict(
71
+ type=CocoDataset,
72
+ data_root=data_root,
73
+ ann_file='annotations/instances_val2017.json',
74
+ data_prefix=dict(img='val2017/'),
75
+ test_mode=True,
76
+ pipeline=test_pipeline,
77
+ backend_args=backend_args))
78
+
79
+ test_dataloader = val_dataloader
80
+
81
+ val_evaluator = dict(
82
+ type=CocoMetric,
83
+ ann_file=data_root + 'annotations/instances_val2017.json',
84
+ metric=['bbox', 'segm'],
85
+ format_only=False,
86
+ backend_args=backend_args)
87
+ test_evaluator = val_evaluator
mmdet/configs/_base_/datasets/coco_panoptic.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms.loading import LoadImageFromFile
3
+ from mmengine.dataset.sampler import DefaultSampler
4
+
5
+ from mmdet.datasets.coco_panoptic import CocoPanopticDataset
6
+ from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
7
+ from mmdet.datasets.transforms.formatting import PackDetInputs
8
+ from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
9
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
10
+ from mmdet.evaluation.metrics.coco_panoptic_metric import CocoPanopticMetric
11
+
12
+ # dataset settings
13
+ dataset_type = 'CocoPanopticDataset'
14
+ data_root = 'data/coco/'
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
21
+
22
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection/',
27
+ # 'data/': 's3://openmmlab/datasets/detection/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(type=LoadImageFromFile, backend_args=backend_args),
33
+ dict(type=LoadPanopticAnnotations, backend_args=backend_args),
34
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
35
+ dict(type=RandomFlip, prob=0.5),
36
+ dict(type=PackDetInputs)
37
+ ]
38
+ test_pipeline = [
39
+ dict(type=LoadImageFromFile, backend_args=backend_args),
40
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
41
+ dict(type=LoadPanopticAnnotations, backend_args=backend_args),
42
+ dict(
43
+ type=PackDetInputs,
44
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
45
+ 'scale_factor'))
46
+ ]
47
+
48
+ train_dataloader = dict(
49
+ batch_size=2,
50
+ num_workers=2,
51
+ persistent_workers=True,
52
+ sampler=dict(type=DefaultSampler, shuffle=True),
53
+ batch_sampler=dict(type=AspectRatioBatchSampler),
54
+ dataset=dict(
55
+ type=CocoPanopticDataset,
56
+ data_root=data_root,
57
+ ann_file='annotations/panoptic_train2017.json',
58
+ data_prefix=dict(
59
+ img='train2017/', seg='annotations/panoptic_train2017/'),
60
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
61
+ pipeline=train_pipeline,
62
+ backend_args=backend_args))
63
+ val_dataloader = dict(
64
+ batch_size=1,
65
+ num_workers=2,
66
+ persistent_workers=True,
67
+ drop_last=False,
68
+ sampler=dict(type=DefaultSampler, shuffle=False),
69
+ dataset=dict(
70
+ type=CocoPanopticDataset,
71
+ data_root=data_root,
72
+ ann_file='annotations/panoptic_val2017.json',
73
+ data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'),
74
+ test_mode=True,
75
+ pipeline=test_pipeline,
76
+ backend_args=backend_args))
77
+ test_dataloader = val_dataloader
78
+
79
+ val_evaluator = dict(
80
+ type=CocoPanopticMetric,
81
+ ann_file=data_root + 'annotations/panoptic_val2017.json',
82
+ seg_prefix=data_root + 'annotations/panoptic_val2017/',
83
+ backend_args=backend_args)
84
+ test_evaluator = val_evaluator
85
+
86
+ # inference on test dataset and
87
+ # format the output results for submission.
88
+ # test_dataloader = dict(
89
+ # batch_size=1,
90
+ # num_workers=1,
91
+ # persistent_workers=True,
92
+ # drop_last=False,
93
+ # sampler=dict(type=DefaultSampler, shuffle=False),
94
+ # dataset=dict(
95
+ # type=CocoPanopticDataset,
96
+ # data_root=data_root,
97
+ # ann_file='annotations/panoptic_image_info_test-dev2017.json',
98
+ # data_prefix=dict(img='test2017/'),
99
+ # test_mode=True,
100
+ # pipeline=test_pipeline))
101
+ # test_evaluator = dict(
102
+ # type=CocoPanopticMetric,
103
+ # format_only=True,
104
+ # ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
105
+ # outfile_prefix='./work_dirs/coco_panoptic/test')
mmdet/configs/_base_/datasets/mot_challenge.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.transforms import (LoadImageFromFile, RandomResize,
3
+ TransformBroadcaster)
4
+
5
+ from mmdet.datasets import MOTChallengeDataset
6
+ from mmdet.datasets.samplers import TrackImgSampler
7
+ from mmdet.datasets.transforms import (LoadTrackAnnotations, PackTrackInputs,
8
+ PhotoMetricDistortion, RandomCrop,
9
+ RandomFlip, Resize,
10
+ UniformRefFrameSample)
11
+ from mmdet.evaluation import MOTChallengeMetric
12
+
13
+ # dataset settings
14
+ dataset_type = MOTChallengeDataset
15
+ data_root = 'data/MOT17/'
16
+ img_scale = (1088, 1088)
17
+
18
+ backend_args = None
19
+ # data pipeline
20
+ train_pipeline = [
21
+ dict(
22
+ type=UniformRefFrameSample,
23
+ num_ref_imgs=1,
24
+ frame_range=10,
25
+ filter_key_img=True),
26
+ dict(
27
+ type=TransformBroadcaster,
28
+ share_random_params=True,
29
+ transforms=[
30
+ dict(type=LoadImageFromFile, backend_args=backend_args),
31
+ dict(type=LoadTrackAnnotations),
32
+ dict(
33
+ type=RandomResize,
34
+ scale=img_scale,
35
+ ratio_range=(0.8, 1.2),
36
+ keep_ratio=True,
37
+ clip_object_border=False),
38
+ dict(type=PhotoMetricDistortion)
39
+ ]),
40
+ dict(
41
+ type=TransformBroadcaster,
42
+ # different cropped positions for different frames
43
+ share_random_params=False,
44
+ transforms=[
45
+ dict(type=RandomCrop, crop_size=img_scale, bbox_clip_border=False)
46
+ ]),
47
+ dict(
48
+ type=TransformBroadcaster,
49
+ share_random_params=True,
50
+ transforms=[
51
+ dict(type=RandomFlip, prob=0.5),
52
+ ]),
53
+ dict(type=PackTrackInputs)
54
+ ]
55
+
56
+ test_pipeline = [
57
+ dict(
58
+ type=TransformBroadcaster,
59
+ transforms=[
60
+ dict(type=LoadImageFromFile, backend_args=backend_args),
61
+ dict(type=Resize, scale=img_scale, keep_ratio=True),
62
+ dict(type=LoadTrackAnnotations)
63
+ ]),
64
+ dict(type=PackTrackInputs)
65
+ ]
66
+
67
+ # dataloader
68
+ train_dataloader = dict(
69
+ batch_size=2,
70
+ num_workers=2,
71
+ persistent_workers=True,
72
+ sampler=dict(type=TrackImgSampler), # image-based sampling
73
+ dataset=dict(
74
+ type=dataset_type,
75
+ data_root=data_root,
76
+ visibility_thr=-1,
77
+ ann_file='annotations/half-train_cocoformat.json',
78
+ data_prefix=dict(img_path='train'),
79
+ metainfo=dict(classes=('pedestrian', )),
80
+ pipeline=train_pipeline))
81
+ val_dataloader = dict(
82
+ batch_size=1,
83
+ num_workers=2,
84
+ persistent_workers=True,
85
+ # Now we support two ways to test, image_based and video_based
86
+ # if you want to use video_based sampling, you can use as follows
87
+ # sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
88
+ sampler=dict(type=TrackImgSampler), # image-based sampling
89
+ dataset=dict(
90
+ type=dataset_type,
91
+ data_root=data_root,
92
+ ann_file='annotations/half-val_cocoformat.json',
93
+ data_prefix=dict(img_path='train'),
94
+ test_mode=True,
95
+ pipeline=test_pipeline))
96
+ test_dataloader = val_dataloader
97
+
98
+ # evaluator
99
+ val_evaluator = dict(
100
+ type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
101
+ test_evaluator = val_evaluator
mmdet/configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
3
+ LoggerHook, ParamSchedulerHook)
4
+ from mmengine.runner import LogProcessor
5
+ from mmengine.visualization import LocalVisBackend
6
+
7
+ from mmdet.engine.hooks import DetVisualizationHook
8
+ from mmdet.visualization import DetLocalVisualizer
9
+
10
+ default_scope = None
11
+
12
+ default_hooks = dict(
13
+ timer=dict(type=IterTimerHook),
14
+ logger=dict(type=LoggerHook, interval=50),
15
+ param_scheduler=dict(type=ParamSchedulerHook),
16
+ checkpoint=dict(type=CheckpointHook, interval=1),
17
+ sampler_seed=dict(type=DistSamplerSeedHook),
18
+ visualization=dict(type=DetVisualizationHook))
19
+
20
+ env_cfg = dict(
21
+ cudnn_benchmark=False,
22
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
23
+ dist_cfg=dict(backend='nccl'),
24
+ )
25
+
26
+ vis_backends = [dict(type=LocalVisBackend)]
27
+ visualizer = dict(
28
+ type=DetLocalVisualizer, vis_backends=vis_backends, name='visualizer')
29
+ log_processor = dict(type=LogProcessor, window_size=50, by_epoch=True)
30
+
31
+ log_level = 'INFO'
32
+ load_from = None
33
+ resume = False
mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models.backbones.resnet import ResNet
6
+ from mmdet.models.data_preprocessors.data_preprocessor import \
7
+ DetDataPreprocessor
8
+ from mmdet.models.dense_heads.rpn_head import RPNHead
9
+ from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
10
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
11
+ from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
12
+ from mmdet.models.necks.fpn import FPN
13
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
14
+ Shared2FCBBoxHead
15
+ from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
16
+ from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
17
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
18
+ SingleRoIExtractor
19
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
20
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
21
+ DeltaXYWHBBoxCoder
22
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
23
+ AnchorGenerator
24
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
25
+
26
+ # model settings
27
+ model = dict(
28
+ type=CascadeRCNN,
29
+ data_preprocessor=dict(
30
+ type=DetDataPreprocessor,
31
+ mean=[123.675, 116.28, 103.53],
32
+ std=[58.395, 57.12, 57.375],
33
+ bgr_to_rgb=True,
34
+ pad_mask=True,
35
+ pad_size_divisor=32),
36
+ backbone=dict(
37
+ type=ResNet,
38
+ depth=50,
39
+ num_stages=4,
40
+ out_indices=(0, 1, 2, 3),
41
+ frozen_stages=1,
42
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
43
+ norm_eval=True,
44
+ style='pytorch',
45
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
46
+ neck=dict(
47
+ type=FPN,
48
+ in_channels=[256, 512, 1024, 2048],
49
+ out_channels=256,
50
+ num_outs=5),
51
+ rpn_head=dict(
52
+ type=RPNHead,
53
+ in_channels=256,
54
+ feat_channels=256,
55
+ anchor_generator=dict(
56
+ type=AnchorGenerator,
57
+ scales=[8],
58
+ ratios=[0.5, 1.0, 2.0],
59
+ strides=[4, 8, 16, 32, 64]),
60
+ bbox_coder=dict(
61
+ type=DeltaXYWHBBoxCoder,
62
+ target_means=[.0, .0, .0, .0],
63
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
64
+ loss_cls=dict(
65
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
66
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
67
+ roi_head=dict(
68
+ type=CascadeRoIHead,
69
+ num_stages=3,
70
+ stage_loss_weights=[1, 0.5, 0.25],
71
+ bbox_roi_extractor=dict(
72
+ type=SingleRoIExtractor,
73
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
74
+ out_channels=256,
75
+ featmap_strides=[4, 8, 16, 32]),
76
+ bbox_head=[
77
+ dict(
78
+ type=Shared2FCBBoxHead,
79
+ in_channels=256,
80
+ fc_out_channels=1024,
81
+ roi_feat_size=7,
82
+ num_classes=80,
83
+ bbox_coder=dict(
84
+ type=DeltaXYWHBBoxCoder,
85
+ target_means=[0., 0., 0., 0.],
86
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
87
+ reg_class_agnostic=True,
88
+ loss_cls=dict(
89
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
90
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
91
+ dict(
92
+ type=Shared2FCBBoxHead,
93
+ in_channels=256,
94
+ fc_out_channels=1024,
95
+ roi_feat_size=7,
96
+ num_classes=80,
97
+ bbox_coder=dict(
98
+ type=DeltaXYWHBBoxCoder,
99
+ target_means=[0., 0., 0., 0.],
100
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
101
+ reg_class_agnostic=True,
102
+ loss_cls=dict(
103
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
104
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
105
+ dict(
106
+ type=Shared2FCBBoxHead,
107
+ in_channels=256,
108
+ fc_out_channels=1024,
109
+ roi_feat_size=7,
110
+ num_classes=80,
111
+ bbox_coder=dict(
112
+ type=DeltaXYWHBBoxCoder,
113
+ target_means=[0., 0., 0., 0.],
114
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
115
+ reg_class_agnostic=True,
116
+ loss_cls=dict(
117
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
118
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
119
+ ],
120
+ mask_roi_extractor=dict(
121
+ type=SingleRoIExtractor,
122
+ roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
123
+ out_channels=256,
124
+ featmap_strides=[4, 8, 16, 32]),
125
+ mask_head=dict(
126
+ type=FCNMaskHead,
127
+ num_convs=4,
128
+ in_channels=256,
129
+ conv_out_channels=256,
130
+ num_classes=80,
131
+ loss_mask=dict(
132
+ type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
133
+ # model training and testing settings
134
+ train_cfg=dict(
135
+ rpn=dict(
136
+ assigner=dict(
137
+ type=MaxIoUAssigner,
138
+ pos_iou_thr=0.7,
139
+ neg_iou_thr=0.3,
140
+ min_pos_iou=0.3,
141
+ match_low_quality=True,
142
+ ignore_iof_thr=-1),
143
+ sampler=dict(
144
+ type=RandomSampler,
145
+ num=256,
146
+ pos_fraction=0.5,
147
+ neg_pos_ub=-1,
148
+ add_gt_as_proposals=False),
149
+ allowed_border=0,
150
+ pos_weight=-1,
151
+ debug=False),
152
+ rpn_proposal=dict(
153
+ nms_pre=2000,
154
+ max_per_img=2000,
155
+ nms=dict(type=nms, iou_threshold=0.7),
156
+ min_bbox_size=0),
157
+ rcnn=[
158
+ dict(
159
+ assigner=dict(
160
+ type=MaxIoUAssigner,
161
+ pos_iou_thr=0.5,
162
+ neg_iou_thr=0.5,
163
+ min_pos_iou=0.5,
164
+ match_low_quality=False,
165
+ ignore_iof_thr=-1),
166
+ sampler=dict(
167
+ type=RandomSampler,
168
+ num=512,
169
+ pos_fraction=0.25,
170
+ neg_pos_ub=-1,
171
+ add_gt_as_proposals=True),
172
+ mask_size=28,
173
+ pos_weight=-1,
174
+ debug=False),
175
+ dict(
176
+ assigner=dict(
177
+ type=MaxIoUAssigner,
178
+ pos_iou_thr=0.6,
179
+ neg_iou_thr=0.6,
180
+ min_pos_iou=0.6,
181
+ match_low_quality=False,
182
+ ignore_iof_thr=-1),
183
+ sampler=dict(
184
+ type=RandomSampler,
185
+ num=512,
186
+ pos_fraction=0.25,
187
+ neg_pos_ub=-1,
188
+ add_gt_as_proposals=True),
189
+ mask_size=28,
190
+ pos_weight=-1,
191
+ debug=False),
192
+ dict(
193
+ assigner=dict(
194
+ type=MaxIoUAssigner,
195
+ pos_iou_thr=0.7,
196
+ neg_iou_thr=0.7,
197
+ min_pos_iou=0.7,
198
+ match_low_quality=False,
199
+ ignore_iof_thr=-1),
200
+ sampler=dict(
201
+ type=RandomSampler,
202
+ num=512,
203
+ pos_fraction=0.25,
204
+ neg_pos_ub=-1,
205
+ add_gt_as_proposals=True),
206
+ mask_size=28,
207
+ pos_weight=-1,
208
+ debug=False)
209
+ ]),
210
+ test_cfg=dict(
211
+ rpn=dict(
212
+ nms_pre=1000,
213
+ max_per_img=1000,
214
+ nms=dict(type=nms, iou_threshold=0.7),
215
+ min_bbox_size=0),
216
+ rcnn=dict(
217
+ score_thr=0.05,
218
+ nms=dict(type=nms, iou_threshold=0.5),
219
+ max_per_img=100,
220
+ mask_thr_binary=0.5)))
mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models.backbones.resnet import ResNet
6
+ from mmdet.models.data_preprocessors.data_preprocessor import \
7
+ DetDataPreprocessor
8
+ from mmdet.models.dense_heads.rpn_head import RPNHead
9
+ from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
10
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
11
+ from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
12
+ from mmdet.models.necks.fpn import FPN
13
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
14
+ Shared2FCBBoxHead
15
+ from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
16
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
17
+ SingleRoIExtractor
18
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
19
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
20
+ DeltaXYWHBBoxCoder
21
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
22
+ AnchorGenerator
23
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
24
+
25
+ # model settings
26
+ model = dict(
27
+ type=CascadeRCNN,
28
+ data_preprocessor=dict(
29
+ type=DetDataPreprocessor,
30
+ mean=[123.675, 116.28, 103.53],
31
+ std=[58.395, 57.12, 57.375],
32
+ bgr_to_rgb=True,
33
+ pad_size_divisor=32),
34
+ backbone=dict(
35
+ type=ResNet,
36
+ depth=50,
37
+ num_stages=4,
38
+ out_indices=(0, 1, 2, 3),
39
+ frozen_stages=1,
40
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
41
+ norm_eval=True,
42
+ style='pytorch',
43
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44
+ neck=dict(
45
+ type=FPN,
46
+ in_channels=[256, 512, 1024, 2048],
47
+ out_channels=256,
48
+ num_outs=5),
49
+ rpn_head=dict(
50
+ type=RPNHead,
51
+ in_channels=256,
52
+ feat_channels=256,
53
+ anchor_generator=dict(
54
+ type=AnchorGenerator,
55
+ scales=[8],
56
+ ratios=[0.5, 1.0, 2.0],
57
+ strides=[4, 8, 16, 32, 64]),
58
+ bbox_coder=dict(
59
+ type=DeltaXYWHBBoxCoder,
60
+ target_means=[.0, .0, .0, .0],
61
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
62
+ loss_cls=dict(
63
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
64
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
65
+ roi_head=dict(
66
+ type=CascadeRoIHead,
67
+ num_stages=3,
68
+ stage_loss_weights=[1, 0.5, 0.25],
69
+ bbox_roi_extractor=dict(
70
+ type=SingleRoIExtractor,
71
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
72
+ out_channels=256,
73
+ featmap_strides=[4, 8, 16, 32]),
74
+ bbox_head=[
75
+ dict(
76
+ type=Shared2FCBBoxHead,
77
+ in_channels=256,
78
+ fc_out_channels=1024,
79
+ roi_feat_size=7,
80
+ num_classes=80,
81
+ bbox_coder=dict(
82
+ type=DeltaXYWHBBoxCoder,
83
+ target_means=[0., 0., 0., 0.],
84
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
85
+ reg_class_agnostic=True,
86
+ loss_cls=dict(
87
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
88
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
89
+ dict(
90
+ type=Shared2FCBBoxHead,
91
+ in_channels=256,
92
+ fc_out_channels=1024,
93
+ roi_feat_size=7,
94
+ num_classes=80,
95
+ bbox_coder=dict(
96
+ type=DeltaXYWHBBoxCoder,
97
+ target_means=[0., 0., 0., 0.],
98
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
99
+ reg_class_agnostic=True,
100
+ loss_cls=dict(
101
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
102
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
103
+ dict(
104
+ type=Shared2FCBBoxHead,
105
+ in_channels=256,
106
+ fc_out_channels=1024,
107
+ roi_feat_size=7,
108
+ num_classes=80,
109
+ bbox_coder=dict(
110
+ type=DeltaXYWHBBoxCoder,
111
+ target_means=[0., 0., 0., 0.],
112
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
113
+ reg_class_agnostic=True,
114
+ loss_cls=dict(
115
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
116
+ loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
117
+ ]),
118
+ # model training and testing settings
119
+ train_cfg=dict(
120
+ rpn=dict(
121
+ assigner=dict(
122
+ type=MaxIoUAssigner,
123
+ pos_iou_thr=0.7,
124
+ neg_iou_thr=0.3,
125
+ min_pos_iou=0.3,
126
+ match_low_quality=True,
127
+ ignore_iof_thr=-1),
128
+ sampler=dict(
129
+ type=RandomSampler,
130
+ num=256,
131
+ pos_fraction=0.5,
132
+ neg_pos_ub=-1,
133
+ add_gt_as_proposals=False),
134
+ allowed_border=0,
135
+ pos_weight=-1,
136
+ debug=False),
137
+ rpn_proposal=dict(
138
+ nms_pre=2000,
139
+ max_per_img=2000,
140
+ nms=dict(type=nms, iou_threshold=0.7),
141
+ min_bbox_size=0),
142
+ rcnn=[
143
+ dict(
144
+ assigner=dict(
145
+ type=MaxIoUAssigner,
146
+ pos_iou_thr=0.5,
147
+ neg_iou_thr=0.5,
148
+ min_pos_iou=0.5,
149
+ match_low_quality=False,
150
+ ignore_iof_thr=-1),
151
+ sampler=dict(
152
+ type=RandomSampler,
153
+ num=512,
154
+ pos_fraction=0.25,
155
+ neg_pos_ub=-1,
156
+ add_gt_as_proposals=True),
157
+ pos_weight=-1,
158
+ debug=False),
159
+ dict(
160
+ assigner=dict(
161
+ type=MaxIoUAssigner,
162
+ pos_iou_thr=0.6,
163
+ neg_iou_thr=0.6,
164
+ min_pos_iou=0.6,
165
+ match_low_quality=False,
166
+ ignore_iof_thr=-1),
167
+ sampler=dict(
168
+ type=RandomSampler,
169
+ num=512,
170
+ pos_fraction=0.25,
171
+ neg_pos_ub=-1,
172
+ add_gt_as_proposals=True),
173
+ pos_weight=-1,
174
+ debug=False),
175
+ dict(
176
+ assigner=dict(
177
+ type=MaxIoUAssigner,
178
+ pos_iou_thr=0.7,
179
+ neg_iou_thr=0.7,
180
+ min_pos_iou=0.7,
181
+ match_low_quality=False,
182
+ ignore_iof_thr=-1),
183
+ sampler=dict(
184
+ type=RandomSampler,
185
+ num=512,
186
+ pos_fraction=0.25,
187
+ neg_pos_ub=-1,
188
+ add_gt_as_proposals=True),
189
+ pos_weight=-1,
190
+ debug=False)
191
+ ]),
192
+ test_cfg=dict(
193
+ rpn=dict(
194
+ nms_pre=1000,
195
+ max_per_img=1000,
196
+ nms=dict(type=nms, iou_threshold=0.7),
197
+ min_bbox_size=0),
198
+ rcnn=dict(
199
+ score_thr=0.05,
200
+ nms=dict(type=nms, iou_threshold=0.5),
201
+ max_per_img=100)))
mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models.backbones.resnet import ResNet
6
+ from mmdet.models.data_preprocessors.data_preprocessor import \
7
+ DetDataPreprocessor
8
+ from mmdet.models.dense_heads.rpn_head import RPNHead
9
+ from mmdet.models.detectors.faster_rcnn import FasterRCNN
10
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
11
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
12
+ from mmdet.models.necks.fpn import FPN
13
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
14
+ Shared2FCBBoxHead
15
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
16
+ SingleRoIExtractor
17
+ from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
18
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
19
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
20
+ DeltaXYWHBBoxCoder
21
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
22
+ AnchorGenerator
23
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
24
+
25
+ # model settings
26
+ model = dict(
27
+ type=FasterRCNN,
28
+ data_preprocessor=dict(
29
+ type=DetDataPreprocessor,
30
+ mean=[123.675, 116.28, 103.53],
31
+ std=[58.395, 57.12, 57.375],
32
+ bgr_to_rgb=True,
33
+ pad_size_divisor=32),
34
+ backbone=dict(
35
+ type=ResNet,
36
+ depth=50,
37
+ num_stages=4,
38
+ out_indices=(0, 1, 2, 3),
39
+ frozen_stages=1,
40
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
41
+ norm_eval=True,
42
+ style='pytorch',
43
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44
+ neck=dict(
45
+ type=FPN,
46
+ in_channels=[256, 512, 1024, 2048],
47
+ out_channels=256,
48
+ num_outs=5),
49
+ rpn_head=dict(
50
+ type=RPNHead,
51
+ in_channels=256,
52
+ feat_channels=256,
53
+ anchor_generator=dict(
54
+ type=AnchorGenerator,
55
+ scales=[8],
56
+ ratios=[0.5, 1.0, 2.0],
57
+ strides=[4, 8, 16, 32, 64]),
58
+ bbox_coder=dict(
59
+ type=DeltaXYWHBBoxCoder,
60
+ target_means=[.0, .0, .0, .0],
61
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
62
+ loss_cls=dict(
63
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
64
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
65
+ roi_head=dict(
66
+ type=StandardRoIHead,
67
+ bbox_roi_extractor=dict(
68
+ type=SingleRoIExtractor,
69
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
70
+ out_channels=256,
71
+ featmap_strides=[4, 8, 16, 32]),
72
+ bbox_head=dict(
73
+ type=Shared2FCBBoxHead,
74
+ in_channels=256,
75
+ fc_out_channels=1024,
76
+ roi_feat_size=7,
77
+ num_classes=80,
78
+ bbox_coder=dict(
79
+ type=DeltaXYWHBBoxCoder,
80
+ target_means=[0., 0., 0., 0.],
81
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
82
+ reg_class_agnostic=False,
83
+ loss_cls=dict(
84
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
85
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0))),
86
+ # model training and testing settings
87
+ train_cfg=dict(
88
+ rpn=dict(
89
+ assigner=dict(
90
+ type=MaxIoUAssigner,
91
+ pos_iou_thr=0.7,
92
+ neg_iou_thr=0.3,
93
+ min_pos_iou=0.3,
94
+ match_low_quality=True,
95
+ ignore_iof_thr=-1),
96
+ sampler=dict(
97
+ type=RandomSampler,
98
+ num=256,
99
+ pos_fraction=0.5,
100
+ neg_pos_ub=-1,
101
+ add_gt_as_proposals=False),
102
+ allowed_border=-1,
103
+ pos_weight=-1,
104
+ debug=False),
105
+ rpn_proposal=dict(
106
+ nms_pre=2000,
107
+ max_per_img=1000,
108
+ nms=dict(type=nms, iou_threshold=0.7),
109
+ min_bbox_size=0),
110
+ rcnn=dict(
111
+ assigner=dict(
112
+ type=MaxIoUAssigner,
113
+ pos_iou_thr=0.5,
114
+ neg_iou_thr=0.5,
115
+ min_pos_iou=0.5,
116
+ match_low_quality=False,
117
+ ignore_iof_thr=-1),
118
+ sampler=dict(
119
+ type=RandomSampler,
120
+ num=512,
121
+ pos_fraction=0.25,
122
+ neg_pos_ub=-1,
123
+ add_gt_as_proposals=True),
124
+ pos_weight=-1,
125
+ debug=False)),
126
+ test_cfg=dict(
127
+ rpn=dict(
128
+ nms_pre=1000,
129
+ max_per_img=1000,
130
+ nms=dict(type=nms, iou_threshold=0.7),
131
+ min_bbox_size=0),
132
+ rcnn=dict(
133
+ score_thr=0.05,
134
+ nms=dict(type=nms, iou_threshold=0.5),
135
+ max_per_img=100)
136
+ # soft-nms is also supported for rcnn testing
137
+ # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
138
+ ))
mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from mmengine.model.weight_init import PretrainedInit
4
+ from torch.nn import BatchNorm2d
5
+
6
+ from mmdet.models.backbones.resnet import ResNet
7
+ from mmdet.models.data_preprocessors.data_preprocessor import \
8
+ DetDataPreprocessor
9
+ from mmdet.models.dense_heads.rpn_head import RPNHead
10
+ from mmdet.models.detectors.mask_rcnn import MaskRCNN
11
+ from mmdet.models.layers import ResLayer
12
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
13
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
14
+ from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead
15
+ from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
16
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
17
+ SingleRoIExtractor
18
+ from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
19
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
20
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
21
+ DeltaXYWHBBoxCoder
22
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
23
+ AnchorGenerator
24
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
25
+
26
+ # model settings
27
+ norm_cfg = dict(type=BatchNorm2d, requires_grad=False)
28
+ # model settings
29
+ model = dict(
30
+ type=MaskRCNN,
31
+ data_preprocessor=dict(
32
+ type=DetDataPreprocessor,
33
+ mean=[103.530, 116.280, 123.675],
34
+ std=[1.0, 1.0, 1.0],
35
+ bgr_to_rgb=False,
36
+ pad_mask=True,
37
+ pad_size_divisor=32),
38
+ backbone=dict(
39
+ type=ResNet,
40
+ depth=50,
41
+ num_stages=3,
42
+ strides=(1, 2, 2),
43
+ dilations=(1, 1, 1),
44
+ out_indices=(2, ),
45
+ frozen_stages=1,
46
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
47
+ norm_eval=True,
48
+ style='caffe',
49
+ init_cfg=dict(
50
+ type=PretrainedInit,
51
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
52
+ rpn_head=dict(
53
+ type=RPNHead,
54
+ in_channels=1024,
55
+ feat_channels=1024,
56
+ anchor_generator=dict(
57
+ type=AnchorGenerator,
58
+ scales=[2, 4, 8, 16, 32],
59
+ ratios=[0.5, 1.0, 2.0],
60
+ strides=[16]),
61
+ bbox_coder=dict(
62
+ type=DeltaXYWHBBoxCoder,
63
+ target_means=[.0, .0, .0, .0],
64
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
65
+ loss_cls=dict(
66
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
67
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
68
+ roi_head=dict(
69
+ type=StandardRoIHead,
70
+ shared_head=dict(
71
+ type=ResLayer,
72
+ depth=50,
73
+ stage=3,
74
+ stride=2,
75
+ dilation=1,
76
+ style='caffe',
77
+ norm_cfg=norm_cfg,
78
+ norm_eval=True),
79
+ bbox_roi_extractor=dict(
80
+ type=SingleRoIExtractor,
81
+ roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
82
+ out_channels=1024,
83
+ featmap_strides=[16]),
84
+ bbox_head=dict(
85
+ type=BBoxHead,
86
+ with_avg_pool=True,
87
+ roi_feat_size=7,
88
+ in_channels=2048,
89
+ num_classes=80,
90
+ bbox_coder=dict(
91
+ type=DeltaXYWHBBoxCoder,
92
+ target_means=[0., 0., 0., 0.],
93
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
94
+ reg_class_agnostic=False,
95
+ loss_cls=dict(
96
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
97
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
98
+ mask_roi_extractor=None,
99
+ mask_head=dict(
100
+ type=FCNMaskHead,
101
+ num_convs=0,
102
+ in_channels=2048,
103
+ conv_out_channels=256,
104
+ num_classes=80,
105
+ loss_mask=dict(
106
+ type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
107
+ # model training and testing settings
108
+ train_cfg=dict(
109
+ rpn=dict(
110
+ assigner=dict(
111
+ type=MaxIoUAssigner,
112
+ pos_iou_thr=0.7,
113
+ neg_iou_thr=0.3,
114
+ min_pos_iou=0.3,
115
+ match_low_quality=True,
116
+ ignore_iof_thr=-1),
117
+ sampler=dict(
118
+ type=RandomSampler,
119
+ num=256,
120
+ pos_fraction=0.5,
121
+ neg_pos_ub=-1,
122
+ add_gt_as_proposals=False),
123
+ allowed_border=0,
124
+ pos_weight=-1,
125
+ debug=False),
126
+ rpn_proposal=dict(
127
+ nms_pre=12000,
128
+ max_per_img=2000,
129
+ nms=dict(type=nms, iou_threshold=0.7),
130
+ min_bbox_size=0),
131
+ rcnn=dict(
132
+ assigner=dict(
133
+ type=MaxIoUAssigner,
134
+ pos_iou_thr=0.5,
135
+ neg_iou_thr=0.5,
136
+ min_pos_iou=0.5,
137
+ match_low_quality=False,
138
+ ignore_iof_thr=-1),
139
+ sampler=dict(
140
+ type=RandomSampler,
141
+ num=512,
142
+ pos_fraction=0.25,
143
+ neg_pos_ub=-1,
144
+ add_gt_as_proposals=True),
145
+ mask_size=14,
146
+ pos_weight=-1,
147
+ debug=False)),
148
+ test_cfg=dict(
149
+ rpn=dict(
150
+ nms_pre=6000,
151
+ max_per_img=1000,
152
+ nms=dict(type=nms, iou_threshold=0.7),
153
+ min_bbox_size=0),
154
+ rcnn=dict(
155
+ score_thr=0.05,
156
+ nms=dict(type=nms, iou_threshold=0.5),
157
+ max_per_img=100,
158
+ mask_thr_binary=0.5)))
mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import RoIAlign, nms
3
+ from mmengine.model.weight_init import PretrainedInit
4
+ from torch.nn import BatchNorm2d
5
+
6
+ from mmdet.models.backbones.resnet import ResNet
7
+ from mmdet.models.data_preprocessors.data_preprocessor import \
8
+ DetDataPreprocessor
9
+ from mmdet.models.dense_heads.rpn_head import RPNHead
10
+ from mmdet.models.detectors.mask_rcnn import MaskRCNN
11
+ from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
12
+ from mmdet.models.losses.smooth_l1_loss import L1Loss
13
+ from mmdet.models.necks.fpn import FPN
14
+ from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
15
+ Shared2FCBBoxHead
16
+ from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
17
+ from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
18
+ SingleRoIExtractor
19
+ from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
20
+ from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
21
+ from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
22
+ DeltaXYWHBBoxCoder
23
+ from mmdet.models.task_modules.prior_generators.anchor_generator import \
24
+ AnchorGenerator
25
+ from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
26
+
27
+ # model settings
28
+ model = dict(
29
+ type=MaskRCNN,
30
+ data_preprocessor=dict(
31
+ type=DetDataPreprocessor,
32
+ mean=[123.675, 116.28, 103.53],
33
+ std=[58.395, 57.12, 57.375],
34
+ bgr_to_rgb=True,
35
+ pad_mask=True,
36
+ pad_size_divisor=32),
37
+ backbone=dict(
38
+ type=ResNet,
39
+ depth=50,
40
+ num_stages=4,
41
+ out_indices=(0, 1, 2, 3),
42
+ frozen_stages=1,
43
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
44
+ norm_eval=True,
45
+ style='pytorch',
46
+ init_cfg=dict(
47
+ type=PretrainedInit, checkpoint='torchvision://resnet50')),
48
+ neck=dict(
49
+ type=FPN,
50
+ in_channels=[256, 512, 1024, 2048],
51
+ out_channels=256,
52
+ num_outs=5),
53
+ rpn_head=dict(
54
+ type=RPNHead,
55
+ in_channels=256,
56
+ feat_channels=256,
57
+ anchor_generator=dict(
58
+ type=AnchorGenerator,
59
+ scales=[8],
60
+ ratios=[0.5, 1.0, 2.0],
61
+ strides=[4, 8, 16, 32, 64]),
62
+ bbox_coder=dict(
63
+ type=DeltaXYWHBBoxCoder,
64
+ target_means=[.0, .0, .0, .0],
65
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
66
+ loss_cls=dict(
67
+ type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
68
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
69
+ roi_head=dict(
70
+ type=StandardRoIHead,
71
+ bbox_roi_extractor=dict(
72
+ type=SingleRoIExtractor,
73
+ roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
74
+ out_channels=256,
75
+ featmap_strides=[4, 8, 16, 32]),
76
+ bbox_head=dict(
77
+ type=Shared2FCBBoxHead,
78
+ in_channels=256,
79
+ fc_out_channels=1024,
80
+ roi_feat_size=7,
81
+ num_classes=80,
82
+ bbox_coder=dict(
83
+ type=DeltaXYWHBBoxCoder,
84
+ target_means=[0., 0., 0., 0.],
85
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
86
+ reg_class_agnostic=False,
87
+ loss_cls=dict(
88
+ type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
89
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
90
+ mask_roi_extractor=dict(
91
+ type=SingleRoIExtractor,
92
+ roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
93
+ out_channels=256,
94
+ featmap_strides=[4, 8, 16, 32]),
95
+ mask_head=dict(
96
+ type=FCNMaskHead,
97
+ num_convs=4,
98
+ in_channels=256,
99
+ conv_out_channels=256,
100
+ num_classes=80,
101
+ loss_mask=dict(
102
+ type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
103
+ # model training and testing settings
104
+ train_cfg=dict(
105
+ rpn=dict(
106
+ assigner=dict(
107
+ type=MaxIoUAssigner,
108
+ pos_iou_thr=0.7,
109
+ neg_iou_thr=0.3,
110
+ min_pos_iou=0.3,
111
+ match_low_quality=True,
112
+ ignore_iof_thr=-1),
113
+ sampler=dict(
114
+ type=RandomSampler,
115
+ num=256,
116
+ pos_fraction=0.5,
117
+ neg_pos_ub=-1,
118
+ add_gt_as_proposals=False),
119
+ allowed_border=-1,
120
+ pos_weight=-1,
121
+ debug=False),
122
+ rpn_proposal=dict(
123
+ nms_pre=2000,
124
+ max_per_img=1000,
125
+ nms=dict(type=nms, iou_threshold=0.7),
126
+ min_bbox_size=0),
127
+ rcnn=dict(
128
+ assigner=dict(
129
+ type=MaxIoUAssigner,
130
+ pos_iou_thr=0.5,
131
+ neg_iou_thr=0.5,
132
+ min_pos_iou=0.5,
133
+ match_low_quality=True,
134
+ ignore_iof_thr=-1),
135
+ sampler=dict(
136
+ type=RandomSampler,
137
+ num=512,
138
+ pos_fraction=0.25,
139
+ neg_pos_ub=-1,
140
+ add_gt_as_proposals=True),
141
+ mask_size=28,
142
+ pos_weight=-1,
143
+ debug=False)),
144
+ test_cfg=dict(
145
+ rpn=dict(
146
+ nms_pre=1000,
147
+ max_per_img=1000,
148
+ nms=dict(type=nms, iou_threshold=0.7),
149
+ min_bbox_size=0),
150
+ rcnn=dict(
151
+ score_thr=0.05,
152
+ nms=dict(type=nms, iou_threshold=0.5),
153
+ max_per_img=100,
154
+ mask_thr_binary=0.5)))
mmdet/configs/_base_/models/retinanet_r50_fpn.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.ops import nms
3
+ from torch.nn import BatchNorm2d
4
+
5
+ from mmdet.models import (FPN, DetDataPreprocessor, FocalLoss, L1Loss, ResNet,
6
+ RetinaHead, RetinaNet)
7
+ from mmdet.models.task_modules import (AnchorGenerator, DeltaXYWHBBoxCoder,
8
+ MaxIoUAssigner, PseudoSampler)
9
+
10
+ # model settings
11
+ model = dict(
12
+ type=RetinaNet,
13
+ data_preprocessor=dict(
14
+ type=DetDataPreprocessor,
15
+ mean=[123.675, 116.28, 103.53],
16
+ std=[58.395, 57.12, 57.375],
17
+ bgr_to_rgb=True,
18
+ pad_size_divisor=32),
19
+ backbone=dict(
20
+ type=ResNet,
21
+ depth=50,
22
+ num_stages=4,
23
+ out_indices=(0, 1, 2, 3),
24
+ frozen_stages=1,
25
+ norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
26
+ norm_eval=True,
27
+ style='pytorch',
28
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
29
+ neck=dict(
30
+ type=FPN,
31
+ in_channels=[256, 512, 1024, 2048],
32
+ out_channels=256,
33
+ start_level=1,
34
+ add_extra_convs='on_input',
35
+ num_outs=5),
36
+ bbox_head=dict(
37
+ type=RetinaHead,
38
+ num_classes=80,
39
+ in_channels=256,
40
+ stacked_convs=4,
41
+ feat_channels=256,
42
+ anchor_generator=dict(
43
+ type=AnchorGenerator,
44
+ octave_base_scale=4,
45
+ scales_per_octave=3,
46
+ ratios=[0.5, 1.0, 2.0],
47
+ strides=[8, 16, 32, 64, 128]),
48
+ bbox_coder=dict(
49
+ type=DeltaXYWHBBoxCoder,
50
+ target_means=[.0, .0, .0, .0],
51
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
52
+ loss_cls=dict(
53
+ type=FocalLoss,
54
+ use_sigmoid=True,
55
+ gamma=2.0,
56
+ alpha=0.25,
57
+ loss_weight=1.0),
58
+ loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
59
+ # model training and testing settings
60
+ train_cfg=dict(
61
+ assigner=dict(
62
+ type=MaxIoUAssigner,
63
+ pos_iou_thr=0.5,
64
+ neg_iou_thr=0.4,
65
+ min_pos_iou=0,
66
+ ignore_iof_thr=-1),
67
+ sampler=dict(
68
+ type=PseudoSampler), # Focal loss should use PseudoSampler
69
+ allowed_border=-1,
70
+ pos_weight=-1,
71
+ debug=False),
72
+ test_cfg=dict(
73
+ nms_pre=1000,
74
+ min_bbox_size=0,
75
+ score_thr=0.05,
76
+ nms=dict(type=nms, iou_threshold=0.5),
77
+ max_per_img=100))
mmdet/configs/_base_/schedules/schedule_1x.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
3
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
4
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
5
+ from torch.optim.sgd import SGD
6
+
7
+ # training schedule for 1x
8
+ train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1)
9
+ val_cfg = dict(type=ValLoop)
10
+ test_cfg = dict(type=TestLoop)
11
+
12
+ # learning rate
13
+ param_scheduler = [
14
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
15
+ dict(
16
+ type=MultiStepLR,
17
+ begin=0,
18
+ end=12,
19
+ by_epoch=True,
20
+ milestones=[8, 11],
21
+ gamma=0.1)
22
+ ]
23
+
24
+ # optimizer
25
+ optim_wrapper = dict(
26
+ type=OptimWrapper,
27
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
28
+
29
+ # Default setting for scaling LR automatically
30
+ # - `enable` means enable scaling LR automatically
31
+ # or not by default.
32
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
33
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
mmdet/configs/_base_/schedules/schedule_2x.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
3
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
4
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
5
+ from torch.optim.sgd import SGD
6
+
7
+ # training schedule for 1x
8
+ train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=24, val_interval=1)
9
+ val_cfg = dict(type=ValLoop)
10
+ test_cfg = dict(type=TestLoop)
11
+
12
+ # learning rate
13
+ param_scheduler = [
14
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
15
+ dict(
16
+ type=MultiStepLR,
17
+ begin=0,
18
+ end=24,
19
+ by_epoch=True,
20
+ milestones=[16, 22],
21
+ gamma=0.1)
22
+ ]
23
+
24
+ # optimizer
25
+ optim_wrapper = dict(
26
+ type=OptimWrapper,
27
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
28
+
29
+ # Default setting for scaling LR automatically
30
+ # - `enable` means enable scaling LR automatically
31
+ # or not by default.
32
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
33
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.datasets.coco_instance import *
11
+ from .._base_.default_runtime import *
12
+ from .._base_.models.cascade_mask_rcnn_r50_fpn import *
13
+ from .._base_.schedules.schedule_1x import *
mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.datasets.coco_detection import *
11
+ from .._base_.default_runtime import *
12
+ from .._base_.models.cascade_rcnn_r50_fpn import *
13
+ from .._base_.schedules.schedule_1x import *
mmdet/configs/common/lsj_100e_coco_detection.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmengine.dataset.sampler import DefaultSampler
13
+ from mmengine.optim import OptimWrapper
14
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
15
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
16
+ from torch.optim import SGD
17
+
18
+ from mmdet.datasets import CocoDataset, RepeatDataset
19
+ from mmdet.datasets.transforms.formatting import PackDetInputs
20
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
21
+ LoadAnnotations,
22
+ LoadImageFromFile)
23
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
24
+ Pad, RandomCrop, RandomFlip,
25
+ RandomResize, Resize)
26
+ from mmdet.evaluation import CocoMetric
27
+
28
+ # dataset settings
29
+ dataset_type = CocoDataset
30
+ data_root = 'data/coco/'
31
+ image_size = (1024, 1024)
32
+
33
+ backend_args = None
34
+
35
+ train_pipeline = [
36
+ dict(type=LoadImageFromFile, backend_args=backend_args),
37
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
38
+ dict(
39
+ type=RandomResize,
40
+ scale=image_size,
41
+ ratio_range=(0.1, 2.0),
42
+ keep_ratio=True),
43
+ dict(
44
+ type=RandomCrop,
45
+ crop_type='absolute_range',
46
+ crop_size=image_size,
47
+ recompute_bbox=True,
48
+ allow_negative_crop=True),
49
+ dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
50
+ dict(type=RandomFlip, prob=0.5),
51
+ dict(type=PackDetInputs)
52
+ ]
53
+ test_pipeline = [
54
+ dict(type=LoadImageFromFile, backend_args=backend_args),
55
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
56
+ dict(type=LoadAnnotations, with_bbox=True),
57
+ dict(
58
+ type=PackDetInputs,
59
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
60
+ 'scale_factor'))
61
+ ]
62
+
63
+ # Use RepeatDataset to speed up training
64
+ train_dataloader = dict(
65
+ batch_size=2,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ sampler=dict(type=DefaultSampler, shuffle=True),
69
+ dataset=dict(
70
+ type=RepeatDataset,
71
+ times=4, # simply change this from 2 to 16 for 50e - 400e training.
72
+ dataset=dict(
73
+ type=dataset_type,
74
+ data_root=data_root,
75
+ ann_file='annotations/instances_train2017.json',
76
+ data_prefix=dict(img='train2017/'),
77
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
78
+ pipeline=train_pipeline,
79
+ backend_args=backend_args)))
80
+ val_dataloader = dict(
81
+ batch_size=1,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type=DefaultSampler, shuffle=False),
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ data_root=data_root,
89
+ ann_file='annotations/instances_val2017.json',
90
+ data_prefix=dict(img='val2017/'),
91
+ test_mode=True,
92
+ pipeline=test_pipeline,
93
+ backend_args=backend_args))
94
+ test_dataloader = val_dataloader
95
+
96
+ val_evaluator = dict(
97
+ type=CocoMetric,
98
+ ann_file=data_root + 'annotations/instances_val2017.json',
99
+ metric=['bbox', 'segm'],
100
+ format_only=False,
101
+ backend_args=backend_args)
102
+ test_evaluator = val_evaluator
103
+
104
+ max_epochs = 25
105
+
106
+ train_cfg = dict(
107
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
108
+ val_cfg = dict(type=ValLoop)
109
+ test_cfg = dict(type=TestLoop)
110
+
111
+ # optimizer assumes bs=64
112
+ optim_wrapper = dict(
113
+ type=OptimWrapper,
114
+ optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
115
+
116
+ # learning rate
117
+ param_scheduler = [
118
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
119
+ dict(
120
+ type=MultiStepLR,
121
+ begin=0,
122
+ end=max_epochs,
123
+ by_epoch=True,
124
+ milestones=[22, 24],
125
+ gamma=0.1)
126
+ ]
127
+
128
+ # only keep latest 2 checkpoints
129
+ default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
130
+
131
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
132
+ # USER SHOULD NOT CHANGE ITS VALUES.
133
+ # base_batch_size = (32 GPUs) x (2 samples per GPU)
134
+ auto_scale_lr = dict(base_batch_size=64)
mmdet/configs/common/lsj_100e_coco_instance.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmengine.dataset.sampler import DefaultSampler
13
+ from mmengine.optim import OptimWrapper
14
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
15
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
16
+ from torch.optim import SGD
17
+
18
+ from mmdet.datasets import CocoDataset, RepeatDataset
19
+ from mmdet.datasets.transforms.formatting import PackDetInputs
20
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
21
+ LoadAnnotations,
22
+ LoadImageFromFile)
23
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
24
+ Pad, RandomCrop, RandomFlip,
25
+ RandomResize, Resize)
26
+ from mmdet.evaluation import CocoMetric
27
+
28
+ # dataset settings
29
+ dataset_type = CocoDataset
30
+ data_root = 'data/coco/'
31
+ image_size = (1024, 1024)
32
+
33
+ backend_args = None
34
+
35
+ train_pipeline = [
36
+ dict(type=LoadImageFromFile, backend_args=backend_args),
37
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
38
+ dict(
39
+ type=RandomResize,
40
+ scale=image_size,
41
+ ratio_range=(0.1, 2.0),
42
+ keep_ratio=True),
43
+ dict(
44
+ type=RandomCrop,
45
+ crop_type='absolute_range',
46
+ crop_size=image_size,
47
+ recompute_bbox=True,
48
+ allow_negative_crop=True),
49
+ dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
50
+ dict(type=RandomFlip, prob=0.5),
51
+ dict(type=PackDetInputs)
52
+ ]
53
+ test_pipeline = [
54
+ dict(type=LoadImageFromFile, backend_args=backend_args),
55
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
56
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
57
+ dict(
58
+ type=PackDetInputs,
59
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
60
+ 'scale_factor'))
61
+ ]
62
+
63
+ # Use RepeatDataset to speed up training
64
+ train_dataloader = dict(
65
+ batch_size=2,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ sampler=dict(type=DefaultSampler, shuffle=True),
69
+ dataset=dict(
70
+ type=RepeatDataset,
71
+ times=4, # simply change this from 2 to 16 for 50e - 400e training.
72
+ dataset=dict(
73
+ type=dataset_type,
74
+ data_root=data_root,
75
+ ann_file='annotations/instances_train2017.json',
76
+ data_prefix=dict(img='train2017/'),
77
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
78
+ pipeline=train_pipeline,
79
+ backend_args=backend_args)))
80
+ val_dataloader = dict(
81
+ batch_size=1,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type=DefaultSampler, shuffle=False),
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ data_root=data_root,
89
+ ann_file='annotations/instances_val2017.json',
90
+ data_prefix=dict(img='val2017/'),
91
+ test_mode=True,
92
+ pipeline=test_pipeline,
93
+ backend_args=backend_args))
94
+ test_dataloader = val_dataloader
95
+
96
+ val_evaluator = dict(
97
+ type=CocoMetric,
98
+ ann_file=data_root + 'annotations/instances_val2017.json',
99
+ metric=['bbox', 'segm'],
100
+ format_only=False,
101
+ backend_args=backend_args)
102
+ test_evaluator = val_evaluator
103
+
104
+ max_epochs = 25
105
+
106
+ train_cfg = dict(
107
+ type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
108
+ val_cfg = dict(type=ValLoop)
109
+ test_cfg = dict(type=TestLoop)
110
+
111
+ # optimizer assumes bs=64
112
+ optim_wrapper = dict(
113
+ type=OptimWrapper,
114
+ optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
115
+
116
+ # learning rate
117
+ param_scheduler = [
118
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
119
+ dict(
120
+ type=MultiStepLR,
121
+ begin=0,
122
+ end=max_epochs,
123
+ by_epoch=True,
124
+ milestones=[22, 24],
125
+ gamma=0.1)
126
+ ]
127
+
128
+ # only keep latest 2 checkpoints
129
+ default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
130
+
131
+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
132
+ # USER SHOULD NOT CHANGE ITS VALUES.
133
+ # base_batch_size = (32 GPUs) x (2 samples per GPU)
134
+ auto_scale_lr = dict(base_batch_size=64)
mmdet/configs/common/lsj_200e_coco_detection.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .lsj_100e_coco_detection import *
11
+
12
+ # 8x25=200e
13
+ train_dataloader.update(dict(dataset=dict(times=8)))
14
+
15
+ # learning rate
16
+ param_scheduler = [
17
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
18
+ dict(
19
+ type=MultiStepLR,
20
+ begin=0,
21
+ end=25,
22
+ by_epoch=True,
23
+ milestones=[22, 24],
24
+ gamma=0.1)
25
+ ]
mmdet/configs/common/lsj_200e_coco_instance.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .lsj_100e_coco_instance import *
11
+
12
+ # 8x25=200e
13
+ train_dataloader.update(dict(dataset=dict(times=8)))
14
+
15
+ # learning rate
16
+ param_scheduler = [
17
+ dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
18
+ dict(
19
+ type=MultiStepLR,
20
+ begin=0,
21
+ end=25,
22
+ by_epoch=True,
23
+ milestones=[22, 24],
24
+ gamma=0.1)
25
+ ]
mmdet/configs/common/ms_3x_coco.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (LoadAnnotations,
23
+ LoadImageFromFile)
24
+ from mmdet.datasets.transforms.transforms import RandomFlip, Resize
25
+ from mmdet.evaluation import CocoMetric
26
+
27
+ # dataset settings
28
+ dataset_type = CocoDataset
29
+ data_root = 'data/coco/'
30
+
31
+ # Example to use different file client
32
+ # Method 1: simply set the data root and let the file I/O module
33
+ # automatically infer from prefix (not support LMDB and Memcache yet)
34
+
35
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
36
+
37
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
38
+ # backend_args = dict(
39
+ # backend='petrel',
40
+ # path_mapping=dict({
41
+ # './data/': 's3://openmmlab/datasets/detection/',
42
+ # 'data/': 's3://openmmlab/datasets/detection/'
43
+ # }))
44
+ backend_args = None
45
+
46
+ # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
47
+ # multiscale_mode='range'
48
+ train_pipeline = [
49
+ dict(type=LoadImageFromFile, backend_args=backend_args),
50
+ dict(type=LoadAnnotations, with_bbox=True),
51
+ dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
52
+ dict(type=RandomFlip, prob=0.5),
53
+ dict(type=PackDetInputs)
54
+ ]
55
+ test_pipeline = [
56
+ dict(type=LoadImageFromFile, backend_args=backend_args),
57
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
58
+ dict(type=LoadAnnotations, with_bbox=True),
59
+ dict(
60
+ type=PackDetInputs,
61
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
62
+ 'scale_factor'))
63
+ ]
64
+ train_dataloader = dict(
65
+ batch_size=2,
66
+ num_workers=2,
67
+ persistent_workers=True,
68
+ pin_memory=True,
69
+ sampler=dict(type=DefaultSampler, shuffle=True),
70
+ batch_sampler=dict(type=AspectRatioBatchSampler),
71
+ dataset=dict(
72
+ type=RepeatDataset,
73
+ times=3,
74
+ dataset=dict(
75
+ type=dataset_type,
76
+ data_root=data_root,
77
+ ann_file='annotations/instances_train2017.json',
78
+ data_prefix=dict(img='train2017/'),
79
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
80
+ pipeline=train_pipeline,
81
+ backend_args=backend_args)))
82
+ val_dataloader = dict(
83
+ batch_size=1,
84
+ num_workers=2,
85
+ persistent_workers=True,
86
+ drop_last=False,
87
+ sampler=dict(type=DefaultSampler, shuffle=False),
88
+ dataset=dict(
89
+ type=dataset_type,
90
+ data_root=data_root,
91
+ ann_file='annotations/instances_val2017.json',
92
+ data_prefix=dict(img='val2017/'),
93
+ test_mode=True,
94
+ pipeline=test_pipeline,
95
+ backend_args=backend_args))
96
+ test_dataloader = val_dataloader
97
+
98
+ val_evaluator = dict(
99
+ type=CocoMetric,
100
+ ann_file=data_root + 'annotations/instances_val2017.json',
101
+ metric='bbox',
102
+ backend_args=backend_args)
103
+ test_evaluator = val_evaluator
104
+
105
+ # training schedule for 3x with `RepeatDataset`
106
+ train_cfg = dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1)
107
+ val_cfg = dict(type=ValLoop)
108
+ test_cfg = dict(type=TestLoop)
109
+
110
+ # learning rate
111
+ param_scheduler = [
112
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
113
+ dict(
114
+ type=MultiStepLR,
115
+ begin=0,
116
+ end=12,
117
+ by_epoch=False,
118
+ milestones=[9, 11],
119
+ gamma=0.1)
120
+ ]
121
+
122
+ # optimizer
123
+ optim_wrapper = dict(
124
+ type=OptimWrapper,
125
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
126
+ # Default setting for scaling LR automatically
127
+ # - `enable` means enable scaling LR automatically
128
+ # or not by default.
129
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
130
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
mmdet/configs/common/ms_3x_coco_instance.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+
34
+ # Example to use different file client
35
+ # Method 1: simply set the data root and let the file I/O module
36
+ # automatically infer from prefix (not support LMDB and Memcache yet)
37
+
38
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
39
+
40
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
41
+ # backend_args = dict(
42
+ # backend='petrel',
43
+ # path_mapping=dict({
44
+ # './data/': 's3://openmmlab/datasets/detection/',
45
+ # 'data/': 's3://openmmlab/datasets/detection/'
46
+ # }))
47
+ backend_args = None
48
+
49
+ train_pipeline = [
50
+ dict(type=LoadImageFromFile, backend_args=backend_args),
51
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
52
+ dict(
53
+ type='RandomResize', scale=[(1333, 640), (1333, 800)],
54
+ keep_ratio=True),
55
+ dict(type=RandomFlip, prob=0.5),
56
+ dict(type=PackDetInputs)
57
+ ]
58
+ test_pipeline = [
59
+ dict(type=LoadImageFromFile, backend_args=backend_args),
60
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
61
+ dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
62
+ dict(
63
+ type=PackDetInputs,
64
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
65
+ 'scale_factor'))
66
+ ]
67
+ train_dataloader.update(
68
+ dict(
69
+ batch_size=2,
70
+ num_workers=2,
71
+ persistent_workers=True,
72
+ sampler=dict(type=DefaultSampler, shuffle=True),
73
+ batch_sampler=dict(type=AspectRatioBatchSampler),
74
+ dataset=dict(
75
+ type=RepeatDataset,
76
+ times=3,
77
+ dataset=dict(
78
+ type=dataset_type,
79
+ data_root=data_root,
80
+ ann_file='annotations/instances_train2017.json',
81
+ data_prefix=dict(img='train2017/'),
82
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
83
+ pipeline=train_pipeline,
84
+ backend_args=backend_args))))
85
+ val_dataloader.update(
86
+ dict(
87
+ batch_size=1,
88
+ num_workers=2,
89
+ persistent_workers=True,
90
+ drop_last=False,
91
+ sampler=dict(type=DefaultSampler, shuffle=False),
92
+ dataset=dict(
93
+ type=dataset_type,
94
+ data_root=data_root,
95
+ ann_file='annotations/instances_val2017.json',
96
+ data_prefix=dict(img='val2017/'),
97
+ test_mode=True,
98
+ pipeline=test_pipeline,
99
+ backend_args=backend_args)))
100
+ test_dataloader = val_dataloader
101
+
102
+ val_evaluator.update(
103
+ dict(
104
+ type=CocoMetric,
105
+ ann_file=data_root + 'annotations/instances_val2017.json',
106
+ metric='bbox',
107
+ backend_args=backend_args))
108
+ test_evaluator = val_evaluator
109
+
110
+ # training schedule for 3x with `RepeatDataset`
111
+ train_cfg.update(dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1))
112
+ val_cfg.update(dict(type=ValLoop))
113
+ test_cfg.update(dict(type=TestLoop))
114
+
115
+ # learning rate
116
+ param_scheduler = [
117
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
118
+ dict(
119
+ type=MultiStepLR,
120
+ begin=0,
121
+ end=12,
122
+ by_epoch=False,
123
+ milestones=[9, 11],
124
+ gamma=0.1)
125
+ ]
126
+
127
+ # optimizer
128
+ optim_wrapper.update(
129
+ dict(
130
+ type=OptimWrapper,
131
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
132
+ # Default setting for scaling LR automatically
133
+ # - `enable` means enable scaling LR automatically
134
+ # or not by default.
135
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
136
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
mmdet/configs/common/ms_90k_coco.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # Align with Detectron2
49
+ backend = 'pillow'
50
+ train_pipeline = [
51
+ dict(
52
+ type=LoadImageFromFile,
53
+ backend_args=backend_args,
54
+ imdecode_backend=backend),
55
+ dict(type=LoadAnnotations, with_bbox=True),
56
+ dict(
57
+ type=RandomChoiceResize,
58
+ scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
59
+ (1333, 768), (1333, 800)],
60
+ keep_ratio=True,
61
+ backend=backend),
62
+ dict(type=RandomFlip, prob=0.5),
63
+ dict(type=PackDetInputs)
64
+ ]
65
+ test_pipeline = [
66
+ dict(
67
+ type=LoadImageFromFile,
68
+ backend_args=backend_args,
69
+ imdecode_backend=backend),
70
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
71
+ dict(type=LoadAnnotations, with_bbox=True),
72
+ dict(
73
+ type=PackDetInputs,
74
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
75
+ 'scale_factor'))
76
+ ]
77
+ train_dataloader.update(
78
+ dict(
79
+ batch_size=2,
80
+ num_workers=2,
81
+ persistent_workers=True,
82
+ pin_memory=True,
83
+ sampler=dict(type=InfiniteSampler, shuffle=True),
84
+ batch_sampler=dict(type=AspectRatioBatchSampler),
85
+ dataset=dict(
86
+ type=dataset_type,
87
+ data_root=data_root,
88
+ ann_file='annotations/instances_train2017.json',
89
+ data_prefix=dict(img='train2017/'),
90
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
91
+ pipeline=train_pipeline,
92
+ backend_args=backend_args)))
93
+ val_dataloader.update(
94
+ dict(
95
+ batch_size=1,
96
+ num_workers=2,
97
+ persistent_workers=True,
98
+ drop_last=False,
99
+ pin_memory=True,
100
+ sampler=dict(type=DefaultSampler, shuffle=False),
101
+ dataset=dict(
102
+ type=dataset_type,
103
+ data_root=data_root,
104
+ ann_file='annotations/instances_val2017.json',
105
+ data_prefix=dict(img='val2017/'),
106
+ test_mode=True,
107
+ pipeline=test_pipeline,
108
+ backend_args=backend_args)))
109
+ test_dataloader = val_dataloader
110
+
111
+ val_evaluator.update(
112
+ dict(
113
+ type=CocoMetric,
114
+ ann_file=data_root + 'annotations/instances_val2017.json',
115
+ metric='bbox',
116
+ format_only=False,
117
+ backend_args=backend_args))
118
+ test_evaluator = val_evaluator
119
+
120
+ # training schedule for 90k
121
+ max_iter = 90000
122
+ train_cfg.update(
123
+ dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
124
+ val_cfg.update(dict(type=ValLoop))
125
+ test_cfg.update(dict(type=TestLoop))
126
+
127
+ # learning rate
128
+ param_scheduler = [
129
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
130
+ dict(
131
+ type=MultiStepLR,
132
+ begin=0,
133
+ end=max_iter,
134
+ by_epoch=False,
135
+ milestones=[60000, 80000],
136
+ gamma=0.1)
137
+ ]
138
+
139
+ # optimizer
140
+ optim_wrapper.update(
141
+ dict(
142
+ type=OptimWrapper,
143
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
144
+ # Default setting for scaling LR automatically
145
+ # - `enable` means enable scaling LR automatically
146
+ # or not by default.
147
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
148
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
149
+
150
+ default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
151
+ log_processor.update(dict(by_epoch=False))
mmdet/configs/common/ms_poly_3x_coco_instance.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
49
+ # multiscale_mode='range'
50
+ train_pipeline = [
51
+ dict(type=LoadImageFromFile, backend_args=backend_args),
52
+ dict(
53
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
54
+ dict(
55
+ type='RandomResize', scale=[(1333, 640), (1333, 800)],
56
+ keep_ratio=True),
57
+ dict(type=RandomFlip, prob=0.5),
58
+ dict(type=PackDetInputs)
59
+ ]
60
+ test_pipeline = [
61
+ dict(type=LoadImageFromFile, backend_args=backend_args),
62
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True),
63
+ dict(
64
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
65
+ dict(
66
+ type=PackDetInputs,
67
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
68
+ 'scale_factor'))
69
+ ]
70
+ train_dataloader.update(
71
+ dict(
72
+ batch_size=2,
73
+ num_workers=2,
74
+ persistent_workers=True,
75
+ pin_memory=True,
76
+ sampler=dict(type=DefaultSampler, shuffle=True),
77
+ batch_sampler=dict(type=AspectRatioBatchSampler),
78
+ dataset=dict(
79
+ type=RepeatDataset,
80
+ data_root=data_root,
81
+ ann_file='annotations/instances_train2017.json',
82
+ data_prefix=dict(img='train2017/'),
83
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
84
+ pipeline=train_pipeline,
85
+ backend_args=backend_args)))
86
+ val_dataloader.update(
87
+ dict(
88
+ batch_size=2,
89
+ num_workers=2,
90
+ persistent_workers=True,
91
+ drop_last=False,
92
+ pin_memory=True,
93
+ sampler=dict(type=DefaultSampler, shuffle=False),
94
+ dataset=dict(
95
+ type=dataset_type,
96
+ data_root=data_root,
97
+ ann_file='annotations/instances_val2017.json',
98
+ data_prefix=dict(img='val2017/'),
99
+ test_mode=True,
100
+ pipeline=test_pipeline,
101
+ backend_args=backend_args)))
102
+ test_dataloader = val_dataloader
103
+
104
+ val_evaluator.update(
105
+ dict(
106
+ type=CocoMetric,
107
+ ann_file=data_root + 'annotations/instances_val2017.json',
108
+ metric=['bbox', 'segm'],
109
+ backend_args=backend_args))
110
+ test_evaluator = val_evaluator
111
+
112
+ # training schedule for 3x with `RepeatDataset`
113
+ train_cfg.update(dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1))
114
+ val_cfg.update(dict(type=ValLoop))
115
+ test_cfg.update(dict(type=TestLoop))
116
+
117
+ # learning rate
118
+ param_scheduler = [
119
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
120
+ dict(
121
+ type=MultiStepLR,
122
+ begin=0,
123
+ end=12,
124
+ by_epoch=False,
125
+ milestones=[9, 11],
126
+ gamma=0.1)
127
+ ]
128
+
129
+ # optimizer
130
+ optim_wrapper.update(
131
+ dict(
132
+ type=OptimWrapper,
133
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
134
+ # Default setting for scaling LR automatically
135
+ # - `enable` means enable scaling LR automatically
136
+ # or not by default.
137
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
138
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
mmdet/configs/common/ms_poly_90k_coco_instance.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ # Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
4
+ # mmcv >= 2.0.1
5
+ # mmengine >= 0.8.0
6
+
7
+ from mmengine.config import read_base
8
+
9
+ with read_base():
10
+ from .._base_.default_runtime import *
11
+
12
+ from mmcv.transforms import RandomChoiceResize
13
+ from mmengine.dataset import RepeatDataset
14
+ from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
15
+ from mmengine.optim import OptimWrapper
16
+ from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
17
+ from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
18
+ from torch.optim import SGD
19
+
20
+ from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
21
+ from mmdet.datasets.transforms.formatting import PackDetInputs
22
+ from mmdet.datasets.transforms.loading import (FilterAnnotations,
23
+ LoadAnnotations,
24
+ LoadImageFromFile)
25
+ from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
26
+ Pad, RandomCrop, RandomFlip,
27
+ RandomResize, Resize)
28
+ from mmdet.evaluation import CocoMetric
29
+
30
+ # dataset settings
31
+ dataset_type = CocoDataset
32
+ data_root = 'data/coco/'
33
+ # Example to use different file client
34
+ # Method 1: simply set the data root and let the file I/O module
35
+ # automatically infer from prefix (not support LMDB and Memcache yet)
36
+
37
+ # data_root = 's3://openmmlab/datasets/detection/coco/'
38
+
39
+ # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
40
+ # backend_args = dict(
41
+ # backend='petrel',
42
+ # path_mapping=dict({
43
+ # './data/': 's3://openmmlab/datasets/detection/',
44
+ # 'data/': 's3://openmmlab/datasets/detection/'
45
+ # }))
46
+ backend_args = None
47
+
48
+ # Align with Detectron2
49
+ backend = 'pillow'
50
+ train_pipeline = [
51
+ dict(
52
+ type=LoadImageFromFile,
53
+ backend_args=backend_args,
54
+ imdecode_backend=backend),
55
+ dict(
56
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
57
+ dict(
58
+ type=RandomChoiceResize,
59
+ scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
60
+ (1333, 768), (1333, 800)],
61
+ keep_ratio=True,
62
+ backend=backend),
63
+ dict(type=RandomFlip, prob=0.5),
64
+ dict(type=PackDetInputs)
65
+ ]
66
+ test_pipeline = [
67
+ dict(
68
+ type=LoadImageFromFile,
69
+ backend_args=backend_args,
70
+ imdecode_backend=backend),
71
+ dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
72
+ dict(
73
+ type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
74
+ dict(
75
+ type=PackDetInputs,
76
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
77
+ 'scale_factor'))
78
+ ]
79
+ train_dataloader.update(
80
+ dict(
81
+ batch_size=2,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ pin_memory=True,
85
+ sampler=dict(type=InfiniteSampler, shuffle=True),
86
+ batch_sampler=dict(type=AspectRatioBatchSampler),
87
+ dataset=dict(
88
+ type=dataset_type,
89
+ data_root=data_root,
90
+ ann_file='annotations/instances_train2017.json',
91
+ data_prefix=dict(img='train2017/'),
92
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
93
+ pipeline=train_pipeline,
94
+ backend_args=backend_args)))
95
+ val_dataloader.update(
96
+ dict(
97
+ batch_size=1,
98
+ num_workers=2,
99
+ persistent_workers=True,
100
+ drop_last=False,
101
+ pin_memory=True,
102
+ sampler=dict(type=DefaultSampler, shuffle=False),
103
+ dataset=dict(
104
+ type=dataset_type,
105
+ data_root=data_root,
106
+ ann_file='annotations/instances_val2017.json',
107
+ data_prefix=dict(img='val2017/'),
108
+ test_mode=True,
109
+ pipeline=test_pipeline,
110
+ backend_args=backend_args)))
111
+ test_dataloader = val_dataloader
112
+
113
+ val_evaluator.update(
114
+ dict(
115
+ type=CocoMetric,
116
+ ann_file=data_root + 'annotations/instances_val2017.json',
117
+ metric=['bbox', 'segm'],
118
+ format_only=False,
119
+ backend_args=backend_args))
120
+ test_evaluator = val_evaluator
121
+
122
+ # training schedule for 90k
123
+ max_iter = 90000
124
+ train_cfg.update(
125
+ dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
126
+ val_cfg.update(dict(type=ValLoop))
127
+ test_cfg.update(dict(type=TestLoop))
128
+
129
+ # learning rate
130
+ param_scheduler = [
131
+ dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
132
+ dict(
133
+ type=MultiStepLR,
134
+ begin=0,
135
+ end=max_iter,
136
+ by_epoch=False,
137
+ milestones=[60000, 80000],
138
+ gamma=0.1)
139
+ ]
140
+
141
+ # optimizer
142
+ optim_wrapper.update(
143
+ dict(
144
+ type=OptimWrapper,
145
+ optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
146
+ # Default setting for scaling LR automatically
147
+ # - `enable` means enable scaling LR automatically
148
+ # or not by default.
149
+ # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
150
+ auto_scale_lr.update(dict(enable=False, base_batch_size=16))
151
+
152
+ default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
153
+ log_processor.update(dict(by_epoch=False))