Spaces:

KyanChen
/

TTP

Runtime error

App Files Files Community

KyanChen commited on Dec 30, 2023

Commit

3b96cb1

1 Parent(s): bb15694

Upload 1861 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +10 -0
.gitignore +128 -0
app.py +42 -0
ckpt/epoch_270.pth +3 -0
configs/.DS_Store +0 -0
configs/TTP/ttp_sam_large_levircd.py +202 -0
configs/TTP/ttp_sam_large_levircd_fp16.py +201 -0
configs/TTP/ttp_sam_large_levircd_infer.py +199 -0
demo/MMSegmentation_Tutorial.ipynb +555 -0
demo/classroom__rgb_00283.jpg +0 -0
demo/demo.png +0 -0
demo/image_demo.py +51 -0
demo/image_demo_with_inferencer.py +54 -0
demo/inference_demo.ipynb +120 -0
demo/rs_image_inference.py +50 -0
demo/video_demo.py +112 -0
mmdet/.DS_Store +0 -0
mmdet/__init__.py +27 -0
mmdet/__pycache__/__init__.cpython-311.pyc +0 -0
mmdet/__pycache__/registry.cpython-311.pyc +0 -0
mmdet/__pycache__/version.cpython-311.pyc +0 -0
mmdet/apis/__init__.py +9 -0
mmdet/apis/det_inferencer.py +644 -0
mmdet/apis/inference.py +372 -0
mmdet/configs/.DS_Store +0 -0
mmdet/configs/_base_/datasets/coco_detection.py +104 -0
mmdet/configs/_base_/datasets/coco_instance.py +106 -0
mmdet/configs/_base_/datasets/coco_instance_semantic.py +87 -0
mmdet/configs/_base_/datasets/coco_panoptic.py +105 -0
mmdet/configs/_base_/datasets/mot_challenge.py +101 -0
mmdet/configs/_base_/default_runtime.py +33 -0
mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +220 -0
mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py +201 -0
mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py +138 -0
mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py +158 -0
mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py +154 -0
mmdet/configs/_base_/models/retinanet_r50_fpn.py +77 -0
mmdet/configs/_base_/schedules/schedule_1x.py +33 -0
mmdet/configs/_base_/schedules/schedule_2x.py +33 -0
mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py +13 -0
mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py +13 -0
mmdet/configs/common/lsj_100e_coco_detection.py +134 -0
mmdet/configs/common/lsj_100e_coco_instance.py +134 -0
mmdet/configs/common/lsj_200e_coco_detection.py +25 -0
mmdet/configs/common/lsj_200e_coco_instance.py +25 -0
mmdet/configs/common/ms_3x_coco.py +130 -0
mmdet/configs/common/ms_3x_coco_instance.py +136 -0
mmdet/configs/common/ms_90k_coco.py +151 -0
mmdet/configs/common/ms_poly_3x_coco_instance.py +138 -0
mmdet/configs/common/ms_poly_90k_coco_instance.py +153 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+samples/A/test_1.png filter=lfs diff=lfs merge=lfs -text
+samples/A/test_2.png filter=lfs diff=lfs merge=lfs -text
+samples/A/test_3.png filter=lfs diff=lfs merge=lfs -text
+samples/A/test_4.png filter=lfs diff=lfs merge=lfs -text
+samples/A/test_5.png filter=lfs diff=lfs merge=lfs -text
+samples/B/test_1.png filter=lfs diff=lfs merge=lfs -text
+samples/B/test_2.png filter=lfs diff=lfs merge=lfs -text
+samples/B/test_3.png filter=lfs diff=lfs merge=lfs -text
+samples/B/test_4.png filter=lfs diff=lfs merge=lfs -text
+samples/B/test_5.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,128 @@

+*.pth
+gradio_cached_examples/
+.idea
+.DS_Store
+work_dirs/
+pretrain_models/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/en/_build/
+docs/zh_cn/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.DS_Store
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+data
+.vscode
+.idea
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+mmseg/.mim
+# Pytorch
+*.pth

app.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import gradio as gr
+import glob
+import torch
+from opencd.apis import OpenCDInferencer
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+config_file = 'configs/TTP/ttp_sam_large_levircd_infer.py'
+checkpoint_file = 'ckpt/epoch_270.pth'
+# build the model from a config file and a checkpoint file
+mmcd_inferencer = OpenCDInferencer(
+    model=config_file,
+    weights=checkpoint_file,
+    classes=['unchanged', 'changed'],
+    palette=[[0, 0, 0], [255, 255, 255]],
+    device=device
+)
+def infer(img1, img2):
+    # test a single image
+    result = mmcd_inferencer([[img1, img2]], show=False, return_vis=True)
+    visualization = result['visualization']
+    return visualization
+with gr.Blocks() as demo:
+    with gr.Row():
+        input_0 = gr.Image(label='Input Image1')
+        input_1 = gr.Image(label='Input Image2')
+    with gr.Row():
+        output_gt = gr.Image(label='Predicted Mask')
+    btn = gr.Button("Detect")
+    btn.click(infer, inputs=[input_0, input_1], outputs=[output_gt])
+    img1_files = glob.glob('samples/A/*.png')
+    img2_files = [f.replace('A', 'B') for f in img1_files]
+    input_files = [[x, y] for x, y in zip(img1_files, img2_files)]
+    gr.Examples(input_files, fn=infer, inputs=[input_0, input_1], outputs=[output_gt], cache_examples=True)
+if __name__ == "__main__":
+    demo.launch()

ckpt/epoch_270.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a37d3a79379f4bf3d7ecb85b71209f35cd8af7e61cae564038397e8b7fb3eaf2
+size 1415063308

configs/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

configs/TTP/ttp_sam_large_levircd.py ADDED Viewed

	@@ -0,0 +1,202 @@

+default_scope = 'opencd'
+work_dir = 'work_dirs/lervicd/ttp_sam_large_levircd'
+custom_imports = dict(imports=['mmseg.ttp'], allow_failed_imports=False)
+env_cfg = dict(
+    cudnn_benchmark=True,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=10, log_metric_by_epoch=True),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=10, save_best='cd/iou_changed', max_keep_ckpts=5, greater_keys=['cd/iou_changed'], save_last=True),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='CDVisualizationHook', interval=1,
+                       img_shape=(1024, 1024, 3))
+)
+vis_backends = [dict(type='CDLocalVisBackend'),
+                dict(type='WandbVisBackend',
+                     init_kwargs=dict(project='samcd', group='levircd', name='ttp_sam_large_levircd'))
+                ]
+visualizer = dict(
+    type='CDLocalVisualizer',
+    vis_backends=vis_backends, name='visualizer', alpha=1.0)
+log_processor = dict(by_epoch=True)
+log_level = 'INFO'
+load_from = None
+resume = False
+crop_size = (512, 512)
+data_preprocessor = dict(
+    type='DualInputSegDataPreProcessor',
+    mean=[123.675, 116.28, 103.53] * 2,
+    std=[58.395, 57.12, 57.375] * 2,
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255,
+    size_divisor=32,
+    test_cfg=dict(size_divisor=32)
+)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+fpn_norm_cfg = dict(type='LN2d', requires_grad=True)
+sam_pretrain_ckpt_path = 'https://download.openmmlab.com/mmclassification/v1/vit_sam/vit-large-p16_sam-pre_3rdparty_sa1b-1024px_20230411-595feafd.pth'
+model = dict(
+    type='SiamEncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='MMPretrainSamVisionEncoder',
+        encoder_cfg=dict(
+            type='mmpretrain.ViTSAM',
+            arch='large',
+            img_size=crop_size[0],
+            patch_size=16,
+            out_channels=256,
+            use_abs_pos=True,
+            use_rel_pos=True,
+            window_size=14,
+            layer_cfgs=dict(type='TimeFusionTransformerEncoderLayer'),
+            init_cfg=dict(type='Pretrained', checkpoint=sam_pretrain_ckpt_path, prefix='backbone.'),
+        ),
+        peft_cfg=dict(
+            r=16,
+            target_modules=["qkv"],
+            lora_dropout=0.01,
+            bias='lora_only',
+        ),
+    ),
+    neck=dict(
+        type='SequentialNeck',
+        necks=[
+            dict(
+                type='FeatureFusionNeck',
+                policy='concat',
+                out_indices=(0,)),
+            dict(
+                type='SimpleFPN',
+                backbone_channel=512,
+                in_channels=[128, 256, 512, 512],
+                out_channels=256,
+                num_outs=5,
+                norm_cfg=fpn_norm_cfg),
+        ],
+    ),
+    decode_head=dict(
+        type='MLPSegHead',
+        out_size=(128, 128),
+        in_channels=[256]*5,
+        in_index=[0, 1, 2, 3, 4],
+        channels=256,
+        dropout_ratio=0,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='mmseg.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2))
+)  # yapf: disable
+dataset_type = 'LEVIR_CD_Dataset'
+data_root = '/mnt/levir_datasets/levir-cd'
+train_pipeline = [
+    dict(type='MultiImgLoadImageFromFile'),
+    dict(type='MultiImgLoadAnnotations'),
+    dict(type='MultiImgRandomRotate', prob=0.5, degree=180),
+    dict(type='MultiImgRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='MultiImgRandomFlip', prob=0.5, direction='horizontal'),
+    dict(type='MultiImgRandomFlip', prob=0.5, direction='vertical'),
+    # dict(type='MultiImgExchangeTime', prob=0.5),
+    dict(
+        type='MultiImgPhotoMetricDistortion',
+        brightness_delta=10,
+        contrast_range=(0.8, 1.2),
+        saturation_range=(0.8, 1.2),
+        hue_delta=10),
+    dict(type='MultiImgPackSegInputs')
+]
+test_pipeline = [
+    dict(type='MultiImgLoadImageFromFile'),
+    dict(type='MultiImgResize', scale=(1024, 1024), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='MultiImgLoadAnnotations'),
+    dict(type='MultiImgPackSegInputs')
+]
+batch_size_per_gpu = 2
+train_dataloader = dict(
+    batch_size=batch_size_per_gpu,
+    num_workers=8,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            seg_map_path='train/label',
+            img_path_from='train/A',
+            img_path_to='train/B'),
+        pipeline=train_pipeline)
+)
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            seg_map_path='test/label',
+            img_path_from='test/A',
+            img_path_to='test/B'),
+        pipeline=test_pipeline)
+)
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='CDMetric',
+)
+test_evaluator = val_evaluator
+max_epochs = 300
+base_lr = 0.0004
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=5, convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=max_epochs,
+        begin=5,
+        by_epoch=True,
+        end=max_epochs,
+        convert_to_iter_based=True
+    ),
+]
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05),
+)

configs/TTP/ttp_sam_large_levircd_fp16.py ADDED Viewed

	@@ -0,0 +1,201 @@

+default_scope = 'opencd'
+work_dir = 'work_dirs/lervicd/ttp_sam_large_levircd_fp16'
+custom_imports = dict(imports=['mmseg.ttp'], allow_failed_imports=False)
+env_cfg = dict(
+    cudnn_benchmark=True,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=10, log_metric_by_epoch=True),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=10, save_best='cd/iou_changed', max_keep_ckpts=5, greater_keys=['cd/iou_changed'], save_last=True),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='CDVisualizationHook', interval=1, img_shape=(1024, 1024, 3))
+)
+vis_backends = [dict(type='CDLocalVisBackend'),
+                dict(type='WandbVisBackend', init_kwargs=dict(project='samcd', group='levircd', name='ttp_sam_large_levircd_fp16'))
+                ]
+visualizer = dict(
+    type='CDLocalVisualizer',
+    vis_backends=vis_backends, name='visualizer', alpha=1.0)
+log_processor = dict(by_epoch=True)
+log_level = 'INFO'
+load_from = None
+resume = False
+crop_size = (512, 512)
+data_preprocessor = dict(
+    type='DualInputSegDataPreProcessor',
+    mean=[123.675, 116.28, 103.53] * 2,
+    std=[58.395, 57.12, 57.375] * 2,
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255,
+    size_divisor=32,
+    test_cfg=dict(size_divisor=32)
+)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+fpn_norm_cfg = dict(type='LN2d', requires_grad=True)
+sam_pretrain_ckpt_path = 'https://download.openmmlab.com/mmclassification/v1/vit_sam/vit-large-p16_sam-pre_3rdparty_sa1b-1024px_20230411-595feafd.pth'
+model = dict(
+    type='SiamEncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='MMPretrainSamVisionEncoder',
+        encoder_cfg=dict(
+            type='mmpretrain.ViTSAM',
+            arch='large',
+            img_size=crop_size[0],
+            patch_size=16,
+            out_channels=256,
+            use_abs_pos=True,
+            use_rel_pos=True,
+            window_size=14,
+            layer_cfgs=dict(type='TimeFusionTransformerEncoderLayer'),
+            init_cfg=dict(type='Pretrained', checkpoint=sam_pretrain_ckpt_path, prefix='backbone.'),
+        ),
+        peft_cfg=dict(
+            r=16,
+            target_modules=["qkv"],
+            lora_dropout=0.01,
+            bias='lora_only',
+        ),
+    ),
+    neck=dict(
+        type='SequentialNeck',
+        necks=[
+            dict(
+                type='FeatureFusionNeck',
+                policy='concat',
+                out_indices=(0,)),
+            dict(
+                type='SimpleFPN',
+                backbone_channel=512,
+                in_channels=[128, 256, 512, 512],
+                out_channels=256,
+                num_outs=5,
+                norm_cfg=fpn_norm_cfg),
+        ],
+    ),
+    decode_head=dict(
+        type='MLPSegHead',
+        out_size=(128, 128),
+        in_channels=[256]*5,
+        in_index=[0, 1, 2, 3, 4],
+        channels=256,
+        dropout_ratio=0,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='mmseg.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2))
+)  # yapf: disable
+dataset_type = 'LEVIR_CD_Dataset'
+data_root = '/mnt/levir_datasets/levir-cd'
+train_pipeline = [
+    dict(type='MultiImgLoadImageFromFile'),
+    dict(type='MultiImgLoadAnnotations'),
+    dict(type='MultiImgRandomRotate', prob=0.5, degree=180),
+    dict(type='MultiImgRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='MultiImgRandomFlip', prob=0.5, direction='horizontal'),
+    dict(type='MultiImgRandomFlip', prob=0.5, direction='vertical'),
+    # dict(type='MultiImgExchangeTime', prob=0.5),
+    dict(
+        type='MultiImgPhotoMetricDistortion',
+        brightness_delta=10,
+        contrast_range=(0.8, 1.2),
+        saturation_range=(0.8, 1.2),
+        hue_delta=10),
+    dict(type='MultiImgPackSegInputs')
+]
+test_pipeline = [
+    dict(type='MultiImgLoadImageFromFile'),
+    dict(type='MultiImgResize', scale=(1024, 1024), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='MultiImgLoadAnnotations'),
+    dict(type='MultiImgPackSegInputs')
+]
+batch_size_per_gpu = 2
+train_dataloader = dict(
+    batch_size=batch_size_per_gpu,
+    num_workers=8,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            seg_map_path='train/label',
+            img_path_from='train/A',
+            img_path_to='train/B'),
+        pipeline=train_pipeline)
+)
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            seg_map_path='test/label',
+            img_path_from='test/A',
+            img_path_to='test/B'),
+        pipeline=test_pipeline)
+)
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='CDMetric',
+)
+test_evaluator = val_evaluator
+max_epochs = 300
+base_lr = 0.0004
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=5, convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=max_epochs,
+        begin=5,
+        by_epoch=True,
+        end=max_epochs,
+        convert_to_iter_based=True
+    ),
+]
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+optim_wrapper = dict(
+    type='AmpOptimWrapper',
+    optimizer=dict(
+        type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05),
+    dtype='float16',
+)

configs/TTP/ttp_sam_large_levircd_infer.py ADDED Viewed

	@@ -0,0 +1,199 @@

+default_scope = 'opencd'
+work_dir = 'work_dirs/lervicd/ttp_sam_large_levircd'
+custom_imports = dict(imports=['mmseg.ttp'], allow_failed_imports=False)
+env_cfg = dict(
+    cudnn_benchmark=True,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=10, log_metric_by_epoch=True),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=10, save_best='cd/iou_changed', max_keep_ckpts=5, greater_keys=['cd/iou_changed'], save_last=True),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='CDVisualizationHook', interval=1,
+                       img_shape=(1024, 1024, 3))
+)
+vis_backends = [dict(type='CDLocalVisBackend')]
+visualizer = dict(
+    type='CDLocalVisualizer',
+    vis_backends=vis_backends, name='visualizer', alpha=1.0)
+log_processor = dict(by_epoch=True)
+log_level = 'INFO'
+load_from = None
+resume = False
+crop_size = (512, 512)
+data_preprocessor = dict(
+    type='DualInputSegDataPreProcessor',
+    mean=[123.675, 116.28, 103.53] * 2,
+    std=[58.395, 57.12, 57.375] * 2,
+    bgr_to_rgb=True,
+    pad_val=0,
+    seg_pad_val=255,
+    size_divisor=32,
+    test_cfg=dict(size_divisor=32)
+)
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+fpn_norm_cfg = dict(type='LN2d', requires_grad=True)
+# sam_pretrain_ckpt_path = 'https://download.openmmlab.com/mmclassification/v1/vit_sam/vit-large-p16_sam-pre_3rdparty_sa1b-1024px_20230411-595feafd.pth'
+model = dict(
+    type='SiamEncoderDecoder',
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        type='MMPretrainSamVisionEncoder',
+        encoder_cfg=dict(
+            type='mmpretrain.ViTSAM',
+            arch='large',
+            img_size=crop_size[0],
+            patch_size=16,
+            out_channels=256,
+            use_abs_pos=True,
+            use_rel_pos=True,
+            window_size=14,
+            layer_cfgs=dict(type='TimeFusionTransformerEncoderLayer'),
+            # init_cfg=dict(type='Pretrained', checkpoint=sam_pretrain_ckpt_path, prefix='backbone.'),
+        ),
+        peft_cfg=dict(
+            r=16,
+            target_modules=["qkv"],
+            lora_dropout=0.01,
+            bias='lora_only',
+        ),
+    ),
+    neck=dict(
+        type='SequentialNeck',
+        necks=[
+            dict(
+                type='FeatureFusionNeck',
+                policy='concat',
+                out_indices=(0,)),
+            dict(
+                type='SimpleFPN',
+                backbone_channel=512,
+                in_channels=[128, 256, 512, 512],
+                out_channels=256,
+                num_outs=5,
+                norm_cfg=fpn_norm_cfg),
+        ],
+    ),
+    decode_head=dict(
+        type='MLPSegHead',
+        out_size=(128, 128),
+        in_channels=[256]*5,
+        in_index=[0, 1, 2, 3, 4],
+        channels=256,
+        dropout_ratio=0,
+        num_classes=2,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='mmseg.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2))
+)  # yapf: disable
+dataset_type = 'LEVIR_CD_Dataset'
+data_root = '/mnt/levir_datasets/levir-cd'
+train_pipeline = [
+    dict(type='MultiImgLoadImageFromFile'),
+    dict(type='MultiImgLoadAnnotations'),
+    dict(type='MultiImgRandomRotate', prob=0.5, degree=180),
+    dict(type='MultiImgRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='MultiImgRandomFlip', prob=0.5, direction='horizontal'),
+    dict(type='MultiImgRandomFlip', prob=0.5, direction='vertical'),
+    # dict(type='MultiImgExchangeTime', prob=0.5),
+    dict(
+        type='MultiImgPhotoMetricDistortion',
+        brightness_delta=10,
+        contrast_range=(0.8, 1.2),
+        saturation_range=(0.8, 1.2),
+        hue_delta=10),
+    dict(type='MultiImgPackSegInputs')
+]
+test_pipeline = [
+    dict(type='MultiImgLoadImageFromFile', to_float32=True),
+    dict(type='MultiImgResize', scale=(1024, 1024), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='MultiImgLoadAnnotations'),
+    dict(type='MultiImgPackSegInputs')
+]
+batch_size_per_gpu = 2
+train_dataloader = dict(
+    batch_size=batch_size_per_gpu,
+    num_workers=8,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            seg_map_path='train/label',
+            img_path_from='train/A',
+            img_path_to='train/B'),
+        pipeline=train_pipeline)
+)
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            seg_map_path='test/label',
+            img_path_from='test/A',
+            img_path_to='test/B'),
+        pipeline=test_pipeline)
+)
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='CDMetric',
+)
+test_evaluator = val_evaluator
+max_epochs = 300
+base_lr = 0.0004
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=5, convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=max_epochs,
+        begin=5,
+        by_epoch=True,
+        end=max_epochs,
+        convert_to_iter_based=True
+    ),
+]
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05),
+)

demo/MMSegmentation_Tutorial.ipynb ADDED Viewed

	@@ -0,0 +1,555 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "view-in-github"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/main/demo/MMSegmentation_Tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "FVmnaxFJvsb8"
+   },
+   "source": [
+    "# MMSegmentation Tutorial\n",
+    "Welcome to MMSegmentation! \n",
+    "\n",
+    "In this tutorial, we demo\n",
+    "* How to do inference with MMSeg trained weight\n",
+    "* How to train on your own dataset and visualize the results. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QS8YHrEhbpas"
+   },
+   "source": [
+    "## Install MMSegmentation\n",
+    "This step may take several minutes. \n",
+    "\n",
+    "We use PyTorch 1.12 and CUDA 11.3 for this tutorial. You may install other versions by change the version number in pip install command. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "UWyLrLYaNEaL",
+    "outputId": "32a47fe3-f10d-47a1-f6b9-b7c235abdab1"
+   },
+   "outputs": [],
+   "source": [
+    "# Check nvcc version\n",
+    "!nvcc -V\n",
+    "# Check GCC version\n",
+    "!gcc --version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Ki3WUBjKbutg",
+    "outputId": "14bd14b0-4d8c-4fa9-e3f9-da35c0efc0d5"
+   },
+   "outputs": [],
+   "source": [
+    "# Install PyTorch\n",
+    "!conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.3 -c pytorch\n",
+    "# Install mim\n",
+    "!pip install -U openmim\n",
+    "# Install mmengine\n",
+    "!mim install mmengine\n",
+    "# Install MMCV\n",
+    "!mim install 'mmcv >= 2.0.0rc1'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "nR-hHRvbNJJZ",
+    "outputId": "10c3b131-d4db-458c-fc10-b94b1c6ed546"
+   },
+   "outputs": [],
+   "source": [
+    "!rm -rf mmsegmentation\n",
+    "!git clone -b main https://github.com/open-mmlab/mmsegmentation.git \n",
+    "%cd mmsegmentation\n",
+    "!pip install -e ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "mAE_h7XhPT7d",
+    "outputId": "83bf0f8e-fc69-40b1-f9fe-0025724a217c"
+   },
+   "outputs": [],
+   "source": [
+    "# Check Pytorch installation\n",
+    "import torch, torchvision\n",
+    "print(torch.__version__, torch.cuda.is_available())\n",
+    "\n",
+    "# Check MMSegmentation installation\n",
+    "import mmseg\n",
+    "print(mmseg.__version__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Ta51clKX4cwM"
+   },
+   "source": [
+    "## Finetune a semantic segmentation model on a new dataset\n",
+    "\n",
+    "To finetune on a customized dataset, the following steps are necessary. \n",
+    "1. Add a new dataset class. \n",
+    "2. Create a config file accordingly. \n",
+    "3. Perform training and evaluation. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "AcZg6x_K5Zs3"
+   },
+   "source": [
+    "### Add a new dataset\n",
+    "\n",
+    "Datasets in MMSegmentation require image and semantic segmentation maps to be placed in folders with the same prefix. To support a new dataset, we may need to modify the original file structure. \n",
+    "\n",
+    "In this tutorial, we give an example of converting the dataset. You may refer to [docs](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/tutorials/customize_datasets.md#customize-datasets-by-reorganizing-data) for details about dataset reorganization. \n",
+    "\n",
+    "We use [Stanford Background Dataset](http://dags.stanford.edu/projects/scenedataset.html) as an example. The dataset contains 715 images chosen from existing public datasets [LabelMe](http://labelme.csail.mit.edu), [MSRC](http://research.microsoft.com/en-us/projects/objectclassrecognition), [PASCAL VOC](http://pascallin.ecs.soton.ac.uk/challenges/VOC) and [Geometric Context](http://www.cs.illinois.edu/homes/dhoiem/). Images from these datasets are mainly outdoor scenes, each containing approximately 320-by-240 pixels. \n",
+    "In this tutorial, we use the region annotations as labels. There are 8 classes in total, i.e. sky, tree, road, grass, water, building, mountain, and foreground object. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "TFIt7MHq5Wls",
+    "outputId": "74a126e4-c8a4-4d2f-a910-b58b71843a23"
+   },
+   "outputs": [],
+   "source": [
+    "# download and unzip\n",
+    "!wget http://dags.stanford.edu/data/iccv09Data.tar.gz -O stanford_background.tar.gz\n",
+    "!tar xf stanford_background.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 377
+    },
+    "id": "78LIci7F9WWI",
+    "outputId": "c432ddac-5a50-47b1-daac-5a26b07afea2"
+   },
+   "outputs": [],
+   "source": [
+    "# Let's take a look at the dataset\n",
+    "import mmcv\n",
+    "import mmengine\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "\n",
+    "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
+    "plt.figure(figsize=(8, 6))\n",
+    "plt.imshow(mmcv.bgr2rgb(img))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "L5mNQuc2GsVE"
+   },
+   "source": [
+    "We need to convert the annotation into semantic map format as an image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WnGZfribFHCx"
+   },
+   "outputs": [],
+   "source": [
+    "# define dataset root and directory for images and annotations\n",
+    "data_root = 'iccv09Data'\n",
+    "img_dir = 'images'\n",
+    "ann_dir = 'labels'\n",
+    "# define class and palette for better visualization\n",
+    "classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n",
+    "palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n",
+    "           [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WnGZfribFHCx"
+   },
+   "outputs": [],
+   "source": [
+    "import os.path as osp\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "\n",
+    "# convert dataset annotation to semantic segmentation map\n",
+    "for file in mmengine.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
+    "  seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n",
+    "  seg_img = Image.fromarray(seg_map).convert('P')\n",
+    "  seg_img.putpalette(np.array(palette, dtype=np.uint8))\n",
+    "  seg_img.save(osp.join(data_root, ann_dir, file.replace('.regions.txt', \n",
+    "                                                         '.png')))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 377
+    },
+    "id": "5MCSS9ABfSks",
+    "outputId": "92b9bafc-589e-48fc-c9e9-476f125d6522"
+   },
+   "outputs": [],
+   "source": [
+    "# Let's take a look at the segmentation map we got\n",
+    "import matplotlib.patches as mpatches\n",
+    "img = Image.open('iccv09Data/labels/6000124.png')\n",
+    "plt.figure(figsize=(8, 6))\n",
+    "im = plt.imshow(np.array(img.convert('RGB')))\n",
+    "\n",
+    "# create a patch (proxy artist) for every color \n",
+    "patches = [mpatches.Patch(color=np.array(palette[i])/255., \n",
+    "                          label=classes[i]) for i in range(8)]\n",
+    "# put those patched as legend-handles into the legend\n",
+    "plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., \n",
+    "           fontsize='large')\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WbeLYCp2k5hl"
+   },
+   "outputs": [],
+   "source": [
+    "# split train/val set randomly\n",
+    "split_dir = 'splits'\n",
+    "mmengine.mkdir_or_exist(osp.join(data_root, split_dir))\n",
+    "filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir(\n",
+    "    osp.join(data_root, ann_dir), suffix='.png')]\n",
+    "with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n",
+    "  # select first 4/5 as train set\n",
+    "  train_length = int(len(filename_list)*4/5)\n",
+    "  f.writelines(line + '\\n' for line in filename_list[:train_length])\n",
+    "with open(osp.join(data_root, split_dir, 'val.txt'), 'w') as f:\n",
+    "  # select last 1/5 as train set\n",
+    "  f.writelines(line + '\\n' for line in filename_list[train_length:])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HchvmGYB_rrO"
+   },
+   "source": [
+    "After downloading the data, we need to implement `load_annotations` function in the new dataset class `StanfordBackgroundDataset`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "LbsWOw62_o-X"
+   },
+   "outputs": [],
+   "source": [
+    "from mmseg.registry import DATASETS\n",
+    "from mmseg.datasets import BaseSegDataset\n",
+    "\n",
+    "\n",
+    "@DATASETS.register_module()\n",
+    "class StanfordBackgroundDataset(BaseSegDataset):\n",
+    "  METAINFO = dict(classes = classes, palette = palette)\n",
+    "  def __init__(self, **kwargs):\n",
+    "    super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yUVtmn3Iq3WA"
+   },
+   "source": [
+    "### Create a config file\n",
+    "In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download config and checkpoint files\n",
+    "!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Wwnj9tRzqX_A"
+   },
+   "outputs": [],
+   "source": [
+    "from mmengine import Config\n",
+    "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')\n",
+    "print(f'Config:\\n{cfg.pretty_text}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1y2oV5w97jQo"
+   },
+   "source": [
+    "Since the given config is used to train PSPNet on the cityscapes dataset, we need to modify it accordingly for our new dataset.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "eyKnYC1Z7iCV",
+    "outputId": "6195217b-187f-4675-994b-ba90d8bb3078"
+   },
+   "outputs": [],
+   "source": [
+    "# Since we use only one GPU, BN is used instead of SyncBN\n",
+    "cfg.norm_cfg = dict(type='BN', requires_grad=True)\n",
+    "cfg.crop_size = (256, 256)\n",
+    "cfg.model.data_preprocessor.size = cfg.crop_size\n",
+    "cfg.model.backbone.norm_cfg = cfg.norm_cfg\n",
+    "cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n",
+    "cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n",
+    "# modify num classes of the model in decode/auxiliary head\n",
+    "cfg.model.decode_head.num_classes = 8\n",
+    "cfg.model.auxiliary_head.num_classes = 8\n",
+    "\n",
+    "# Modify dataset type and path\n",
+    "cfg.dataset_type = 'StanfordBackgroundDataset'\n",
+    "cfg.data_root = data_root\n",
+    "\n",
+    "cfg.train_dataloader.batch_size = 8\n",
+    "\n",
+    "cfg.train_pipeline = [\n",
+    "    dict(type='LoadImageFromFile'),\n",
+    "    dict(type='LoadAnnotations'),\n",
+    "    dict(type='RandomResize', scale=(320, 240), ratio_range=(0.5, 2.0), keep_ratio=True),\n",
+    "    dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n",
+    "    dict(type='RandomFlip', prob=0.5),\n",
+    "    dict(type='PackSegInputs')\n",
+    "]\n",
+    "\n",
+    "cfg.test_pipeline = [\n",
+    "    dict(type='LoadImageFromFile'),\n",
+    "    dict(type='Resize', scale=(320, 240), keep_ratio=True),\n",
+    "    # add loading annotation after ``Resize`` because ground truth\n",
+    "    # does not need to do resize data transform\n",
+    "    dict(type='LoadAnnotations'),\n",
+    "    dict(type='PackSegInputs')\n",
+    "]\n",
+    "\n",
+    "\n",
+    "cfg.train_dataloader.dataset.type = cfg.dataset_type\n",
+    "cfg.train_dataloader.dataset.data_root = cfg.data_root\n",
+    "cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
+    "cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline\n",
+    "cfg.train_dataloader.dataset.ann_file = 'splits/train.txt'\n",
+    "\n",
+    "cfg.val_dataloader.dataset.type = cfg.dataset_type\n",
+    "cfg.val_dataloader.dataset.data_root = cfg.data_root\n",
+    "cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
+    "cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline\n",
+    "cfg.val_dataloader.dataset.ann_file = 'splits/val.txt'\n",
+    "\n",
+    "cfg.test_dataloader = cfg.val_dataloader\n",
+    "\n",
+    "\n",
+    "# Load the pretrained weights\n",
+    "cfg.load_from = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
+    "\n",
+    "# Set up working dir to save files and logs.\n",
+    "cfg.work_dir = './work_dirs/tutorial'\n",
+    "\n",
+    "cfg.train_cfg.max_iters = 200\n",
+    "cfg.train_cfg.val_interval = 200\n",
+    "cfg.default_hooks.logger.interval = 10\n",
+    "cfg.default_hooks.checkpoint.interval = 200\n",
+    "\n",
+    "# Set seed to facilitate reproducing the result\n",
+    "cfg['randomness'] = dict(seed=0)\n",
+    "\n",
+    "# Let's have a look at the final config used for training\n",
+    "print(f'Config:\\n{cfg.pretty_text}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QWuH14LYF2gQ"
+   },
+   "source": [
+    "### Train and Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "jYKoSfdMF12B",
+    "outputId": "422219ca-d7a5-4890-f09f-88c959942e64"
+   },
+   "outputs": [],
+   "source": [
+    "from mmengine.runner import Runner\n",
+    "\n",
+    "runner = Runner.from_cfg(cfg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start training\n",
+    "runner.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "DEkWOP-NMbc_"
+   },
+   "source": [
+    "Inference with trained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 645
+    },
+    "id": "ekG__UfaH_OU",
+    "outputId": "1437419c-869a-4902-df86-d4f6f8b2597a"
+   },
+   "outputs": [],
+   "source": [
+    "from mmseg.apis import init_model, inference_model, show_result_pyplot\n",
+    "\n",
+    "# Init the model from the config and the checkpoint\n",
+    "checkpoint_path = './work_dirs/tutorial/iter_200.pth'\n",
+    "model = init_model(cfg, checkpoint_path, 'cuda:0')\n",
+    "\n",
+    "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
+    "result = inference_model(model, img)\n",
+    "plt.figure(figsize=(8, 6))\n",
+    "vis_result = show_result_pyplot(model, img, result)\n",
+    "plt.imshow(mmcv.bgr2rgb(vis_result))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "include_colab_link": true,
+   "name": "MMSegmentation Tutorial.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10.6 ('pt1.12')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "0442e67aee3d9cbb788fa6e86d60c4ffa94ad7f1943c65abfecb99a6f4696c58"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

demo/classroom__rgb_00283.jpg ADDED Viewed

demo/demo.png ADDED Viewed

demo/image_demo.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser
+from mmengine.model import revert_sync_batchnorm
+from mmseg.apis import inference_model, init_model, show_result_pyplot
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument('--out-file', default=None, help='Path to output file')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--opacity',
+        type=float,
+        default=0.5,
+        help='Opacity of painted segmentation map. In (0, 1] range.')
+    parser.add_argument(
+        '--with-labels',
+        action='store_true',
+        default=False,
+        help='Whether to display the class labels.')
+    parser.add_argument(
+        '--title', default='result', help='The image identifier.')
+    args = parser.parse_args()
+    # build the model from a config file and a checkpoint file
+    model = init_model(args.config, args.checkpoint, device=args.device)
+    if args.device == 'cpu':
+        model = revert_sync_batchnorm(model)
+    # test a single image
+    result = inference_model(model, args.img)
+    # show the results
+    show_result_pyplot(
+        model,
+        args.img,
+        result,
+        title=args.title,
+        opacity=args.opacity,
+        with_labels=args.with_labels,
+        draw_gt=False,
+        show=False if args.out_file is not None else True,
+        out_file=args.out_file)
+if __name__ == '__main__':
+    main()

demo/image_demo_with_inferencer.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser
+from mmseg.apis import MMSegInferencer
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('model', help='Config file')
+    parser.add_argument('--checkpoint', default=None, help='Checkpoint file')
+    parser.add_argument(
+        '--out-dir', default='', help='Path to save result file')
+    parser.add_argument(
+        '--show',
+        action='store_true',
+        default=False,
+        help='Whether to display the drawn image.')
+    parser.add_argument(
+        '--dataset-name',
+        default='cityscapes',
+        help='Color palette used for segmentation map')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--opacity',
+        type=float,
+        default=0.5,
+        help='Opacity of painted segmentation map. In (0, 1] range.')
+    parser.add_argument(
+        '--with-labels',
+        action='store_true',
+        default=False,
+        help='Whether to display the class labels.')
+    args = parser.parse_args()
+    # build the model from a config file and a checkpoint file
+    mmseg_inferencer = MMSegInferencer(
+        args.model,
+        args.checkpoint,
+        dataset_name=args.dataset_name,
+        device=args.device)
+    # test a single image
+    mmseg_inferencer(
+        args.img,
+        show=args.show,
+        out_dir=args.out_dir,
+        opacity=args.opacity,
+        with_labels=args.with_labels)
+if __name__ == '__main__':
+    main()

demo/inference_demo.ipynb ADDED Viewed

	@@ -0,0 +1,120 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir ../checkpoints\n",
+    "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P ../checkpoints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import matplotlib.pyplot as plt\n",
+    "from mmengine.model.utils import revert_sync_batchnorm\n",
+    "from mmseg.apis import init_model, inference_model, show_result_pyplot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "config_file = '../configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'\n",
+    "checkpoint_file = '../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build the model from a config file and a checkpoint file\n",
+    "model = init_model(config_file, checkpoint_file, device='cpu')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test a single image\n",
+    "img = 'demo.png'\n",
+    "if not torch.cuda.is_available():\n",
+    "    model = revert_sync_batchnorm(model)\n",
+    "result = inference_model(model, img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# show the results\n",
+    "vis_result = show_result_pyplot(model, img, result, show=False)\n",
+    "plt.imshow(vis_result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pt1.13",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f61d5b8fecdd960739697f6c2860080d7b76a5be5d896cb034bdb275ab3ddda0"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

demo/rs_image_inference.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser
+from mmseg.apis import RSImage, RSInferencer
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('image', help='Image file path')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--output-path',
+        help='Path to save result image',
+        default='result.png')
+    parser.add_argument(
+        '--batch-size',
+        type=int,
+        default=1,
+        help='maximum number of windows inferred simultaneously')
+    parser.add_argument(
+        '--window-size',
+        help='window xsize,ysize',
+        default=(224, 224),
+        type=int,
+        nargs=2)
+    parser.add_argument(
+        '--stride',
+        help='window xstride,ystride',
+        default=(224, 224),
+        type=int,
+        nargs=2)
+    parser.add_argument(
+        '--thread', default=1, type=int, help='number of inference threads')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    args = parser.parse_args()
+    inferencer = RSInferencer.from_config_path(
+        args.config,
+        args.checkpoint,
+        batch_size=args.batch_size,
+        thread=args.thread,
+        device=args.device)
+    image = RSImage(args.image)
+    inferencer.run(image, args.window_size, args.stride, args.output_path)
+if __name__ == '__main__':
+    main()

demo/video_demo.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser
+import cv2
+from mmengine.model.utils import revert_sync_batchnorm
+from mmseg.apis import inference_model, init_model
+from mmseg.apis.inference import show_result_pyplot
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('video', help='Video file or webcam id')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--palette',
+        default='cityscapes',
+        help='Color palette used for segmentation map')
+    parser.add_argument(
+        '--show', action='store_true', help='Whether to show draw result')
+    parser.add_argument(
+        '--show-wait-time', default=1, type=int, help='Wait time after imshow')
+    parser.add_argument(
+        '--output-file', default=None, type=str, help='Output video file path')
+    parser.add_argument(
+        '--output-fourcc',
+        default='MJPG',
+        type=str,
+        help='Fourcc of the output video')
+    parser.add_argument(
+        '--output-fps', default=-1, type=int, help='FPS of the output video')
+    parser.add_argument(
+        '--output-height',
+        default=-1,
+        type=int,
+        help='Frame height of the output video')
+    parser.add_argument(
+        '--output-width',
+        default=-1,
+        type=int,
+        help='Frame width of the output video')
+    parser.add_argument(
+        '--opacity',
+        type=float,
+        default=0.5,
+        help='Opacity of painted segmentation map. In (0, 1] range.')
+    args = parser.parse_args()
+    assert args.show or args.output_file, \
+        'At least one output should be enabled.'
+    # build the model from a config file and a checkpoint file
+    model = init_model(args.config, args.checkpoint, device=args.device)
+    if args.device == 'cpu':
+        model = revert_sync_batchnorm(model)
+    # build input video
+    if args.video.isdigit():
+        args.video = int(args.video)
+    cap = cv2.VideoCapture(args.video)
+    assert (cap.isOpened())
+    input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
+    input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
+    input_fps = cap.get(cv2.CAP_PROP_FPS)
+    # init output video
+    writer = None
+    output_height = None
+    output_width = None
+    if args.output_file is not None:
+        fourcc = cv2.VideoWriter_fourcc(*args.output_fourcc)
+        output_fps = args.output_fps if args.output_fps > 0 else input_fps
+        output_height = args.output_height if args.output_height > 0 else int(
+            input_height)
+        output_width = args.output_width if args.output_width > 0 else int(
+            input_width)
+        writer = cv2.VideoWriter(args.output_file, fourcc, output_fps,
+                                 (output_width, output_height), True)
+    # start looping
+    try:
+        while True:
+            flag, frame = cap.read()
+            if not flag:
+                break
+            # test a single image
+            result = inference_model(model, frame)
+            # blend raw image and prediction
+            draw_img = show_result_pyplot(model, frame, result)
+            if args.show:
+                cv2.imshow('video_demo', draw_img)
+                cv2.waitKey(args.show_wait_time)
+            if writer:
+                if draw_img.shape[0] != output_height or draw_img.shape[
+                        1] != output_width:
+                    draw_img = cv2.resize(draw_img,
+                                          (output_width, output_height))
+                writer.write(draw_img)
+    finally:
+        if writer:
+            writer.release()
+        cap.release()
+if __name__ == '__main__':
+    main()

mmdet/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

mmdet/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import mmengine
+from mmengine.utils import digit_version
+from .version import __version__, version_info
+mmcv_minimum_version = '2.0.0rc4'
+mmcv_maximum_version = '2.2.0'
+mmcv_version = digit_version(mmcv.__version__)
+mmengine_minimum_version = '0.7.1'
+mmengine_maximum_version = '1.0.0'
+mmengine_version = digit_version(mmengine.__version__)
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version < digit_version(mmcv_maximum_version)), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.'
+assert (mmengine_version >= digit_version(mmengine_minimum_version)
+        and mmengine_version < digit_version(mmengine_maximum_version)), \
+    f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
+    f'Please install mmengine>={mmengine_minimum_version}, ' \
+    f'<{mmengine_maximum_version}.'
+__all__ = ['__version__', 'version_info', 'digit_version']

mmdet/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.29 kB). View file

mmdet/__pycache__/registry.cpython-311.pyc ADDED Viewed

Binary file (3.82 kB). View file

mmdet/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (1.35 kB). View file

mmdet/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .det_inferencer import DetInferencer
+from .inference import (async_inference_detector, inference_detector,
+                        inference_mot, init_detector, init_track_model)
+__all__ = [
+    'init_detector', 'async_inference_detector', 'inference_detector',
+    'DetInferencer', 'inference_mot', 'init_track_model'
+]

mmdet/apis/det_inferencer.py ADDED Viewed

	@@ -0,0 +1,644 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+import warnings
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
+import mmcv
+import mmengine
+import numpy as np
+import torch.nn as nn
+from mmcv.transforms import LoadImageFromFile
+from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
+from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.model.utils import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmengine.runner.checkpoint import _load_checkpoint_to_model
+from mmengine.visualization import Visualizer
+from rich.progress import track
+from mmdet.evaluation import INSTANCE_OFFSET
+from mmdet.registry import DATASETS
+from mmdet.structures import DetDataSample
+from mmdet.structures.mask import encode_mask_results, mask2bbox
+from mmdet.utils import ConfigType
+from ..evaluation import get_classes
+try:
+    from panopticapi.evaluation import VOID
+    from panopticapi.utils import id2rgb
+except ImportError:
+    id2rgb = None
+    VOID = None
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = List[DetDataSample]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
+                  '.tiff', '.webp')
+class DetInferencer(BaseInferencer):
+    """Object Detection Inferencer.
+    Args:
+        model (str, optional): Path to the config file or the model name
+            defined in metafile. For example, it could be
+            "rtmdet-s" or 'rtmdet_s_8xb32-300e_coco' or
+            "configs/rtmdet/rtmdet_s_8xb32-300e_coco.py".
+            If model is not specified, user must provide the
+            `weights` saved by MMEngine which contains the config string.
+            Defaults to None.
+        weights (str, optional): Path to the checkpoint. If it is not specified
+            and model is a model name of metafile, the weights will be loaded
+            from metafile. Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        scope (str, optional): The scope of the model. Defaults to mmdet.
+        palette (str): Color palette used for visualization. The order of
+            priority is palette -> config -> checkpoint. Defaults to 'none'.
+        show_progress (bool): Control whether to display the progress
+            bar during the inference process. Defaults to True.
+    """
+    preprocess_kwargs: set = set()
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis',
+        'show',
+        'wait_time',
+        'draw_pred',
+        'pred_score_thr',
+        'img_out_dir',
+        'no_save_vis',
+    }
+    postprocess_kwargs: set = {
+        'print_result',
+        'pred_out_dir',
+        'return_datasamples',
+        'no_save_pred',
+    }
+    def __init__(self,
+                 model: Optional[Union[ModelType, str]] = None,
+                 weights: Optional[str] = None,
+                 device: Optional[str] = None,
+                 scope: Optional[str] = 'mmdet',
+                 palette: str = 'none',
+                 show_progress: bool = True) -> None:
+        # A global counter tracking the number of images processed, for
+        # naming of the output images
+        self.num_visualized_imgs = 0
+        self.num_predicted_imgs = 0
+        self.palette = palette
+        init_default_scope(scope)
+        super().__init__(
+            model=model, weights=weights, device=device, scope=scope)
+        self.model = revert_sync_batchnorm(self.model)
+        self.show_progress = show_progress
+    def _load_weights_to_model(self, model: nn.Module,
+                               checkpoint: Optional[dict],
+                               cfg: Optional[ConfigType]) -> None:
+        """Loading model weights and meta information from cfg and checkpoint.
+        Args:
+            model (nn.Module): Model to load weights and meta information.
+            checkpoint (dict, optional): The loaded checkpoint.
+            cfg (Config or ConfigDict, optional): The loaded config.
+        """
+        if checkpoint is not None:
+            _load_checkpoint_to_model(model, checkpoint)
+            checkpoint_meta = checkpoint.get('meta', {})
+            # save the dataset_meta in the model for convenience
+            if 'dataset_meta' in checkpoint_meta:
+                # mmdet 3.x, all keys should be lowercase
+                model.dataset_meta = {
+                    k.lower(): v
+                    for k, v in checkpoint_meta['dataset_meta'].items()
+                }
+            elif 'CLASSES' in checkpoint_meta:
+                # < mmdet 3.x
+                classes = checkpoint_meta['CLASSES']
+                model.dataset_meta = {'classes': classes}
+            else:
+                warnings.warn(
+                    'dataset_meta or class names are not saved in the '
+                    'checkpoint\'s meta data, use COCO classes by default.')
+                model.dataset_meta = {'classes': get_classes('coco')}
+        else:
+            warnings.warn('Checkpoint is not loaded, and the inference '
+                          'result is calculated by the randomly initialized '
+                          'model!')
+            warnings.warn('weights is None, use COCO classes by default.')
+            model.dataset_meta = {'classes': get_classes('coco')}
+        # Priority:  args.palette -> config -> checkpoint
+        if self.palette != 'none':
+            model.dataset_meta['palette'] = self.palette
+        else:
+            test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
+            # lazy init. We only need the metainfo.
+            test_dataset_cfg['lazy_init'] = True
+            metainfo = DATASETS.build(test_dataset_cfg).metainfo
+            cfg_palette = metainfo.get('palette', None)
+            if cfg_palette is not None:
+                model.dataset_meta['palette'] = cfg_palette
+            else:
+                if 'palette' not in model.dataset_meta:
+                    warnings.warn(
+                        'palette does not exist, random is used by default. '
+                        'You can also set the palette to customize.')
+                    model.dataset_meta['palette'] = 'random'
+    def _init_pipeline(self, cfg: ConfigType) -> Compose:
+        """Initialize the test pipeline."""
+        pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        # For inference, the key of ``img_id`` is not used.
+        if 'meta_keys' in pipeline_cfg[-1]:
+            pipeline_cfg[-1]['meta_keys'] = tuple(
+                meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
+                if meta_key != 'img_id')
+        load_img_idx = self._get_transform_idx(
+            pipeline_cfg, ('LoadImageFromFile', LoadImageFromFile))
+        if load_img_idx == -1:
+            raise ValueError(
+                'LoadImageFromFile is not found in the test pipeline')
+        pipeline_cfg[load_img_idx]['type'] = 'mmdet.InferencerLoader'
+        return Compose(pipeline_cfg)
+    def _get_transform_idx(self, pipeline_cfg: ConfigType,
+                           name: Union[str, Tuple[str, type]]) -> int:
+        """Returns the index of the transform in a pipeline.
+        If the transform is not found, returns -1.
+        """
+        for i, transform in enumerate(pipeline_cfg):
+            if transform['type'] in name:
+                return i
+        return -1
+    def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]:
+        """Initialize visualizers.
+        Args:
+            cfg (ConfigType): Config containing the visualizer information.
+        Returns:
+            Visualizer or None: Visualizer initialized with config.
+        """
+        visualizer = super()._init_visualizer(cfg)
+        visualizer.dataset_meta = self.model.dataset_meta
+        return visualizer
+    def _inputs_to_list(self, inputs: InputsType) -> list:
+        """Preprocess the inputs to a list.
+        Preprocess inputs to a list according to its type:
+        - list or tuple: return inputs
+        - str:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string according
+              to the task.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+        if isinstance(inputs, str):
+            backend = get_file_backend(inputs)
+            if hasattr(backend, 'isdir') and isdir(inputs):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the inputs
+                # as a directory
+                filename_list = list_dir_or_file(
+                    inputs, list_dir=False, suffix=IMG_EXTENSIONS)
+                inputs = [
+                    join_path(inputs, filename) for filename in filename_list
+                ]
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+        return list(inputs)
+    def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
+        """Process the inputs into a model-feedable format.
+        Customize your preprocess by overriding this method. Preprocess should
+        return an iterable object, of which each item will be used as the
+        input of ``model.test_step``.
+        ``BaseInferencer.preprocess`` will return an iterable chunked data,
+        which will be used in __call__ like this:
+        .. code-block:: python
+            def __call__(self, inputs, batch_size=1, **kwargs):
+                chunked_data = self.preprocess(inputs, batch_size, **kwargs)
+                for batch in chunked_data:
+                    preds = self.forward(batch, **kwargs)
+        Args:
+            inputs (InputsType): Inputs given by user.
+            batch_size (int): batch size. Defaults to 1.
+        Yields:
+            Any: Data processed by the ``pipeline`` and ``collate_fn``.
+        """
+        chunked_data = self._get_chunk_data(inputs, batch_size)
+        yield from map(self.collate_fn, chunked_data)
+    def _get_chunk_data(self, inputs: Iterable, chunk_size: int):
+        """Get batch data from inputs.
+        Args:
+            inputs (Iterable): An iterable dataset.
+            chunk_size (int): Equivalent to batch size.
+        Yields:
+            list: batch data.
+        """
+        inputs_iter = iter(inputs)
+        while True:
+            try:
+                chunk_data = []
+                for _ in range(chunk_size):
+                    inputs_ = next(inputs_iter)
+                    if isinstance(inputs_, dict):
+                        if 'img' in inputs_:
+                            ori_inputs_ = inputs_['img']
+                        else:
+                            ori_inputs_ = inputs_['img_path']
+                        chunk_data.append(
+                            (ori_inputs_,
+                             self.pipeline(copy.deepcopy(inputs_))))
+                    else:
+                        chunk_data.append((inputs_, self.pipeline(inputs_)))
+                yield chunk_data
+            except StopIteration:
+                if chunk_data:
+                    yield chunk_data
+                break
+    # TODO: Video and Webcam are currently not supported and
+    #  may consume too much memory if your input folder has a lot of images.
+    #  We will be optimized later.
+    def __call__(
+            self,
+            inputs: InputsType,
+            batch_size: int = 1,
+            return_vis: bool = False,
+            show: bool = False,
+            wait_time: int = 0,
+            no_save_vis: bool = False,
+            draw_pred: bool = True,
+            pred_score_thr: float = 0.3,
+            return_datasamples: bool = False,
+            print_result: bool = False,
+            no_save_pred: bool = True,
+            out_dir: str = '',
+            # by open image task
+            texts: Optional[Union[str, list]] = None,
+            # by open panoptic task
+            stuff_texts: Optional[Union[str, list]] = None,
+            # by GLIP
+            custom_entities: bool = False,
+            **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            batch_size (int): Inference batch size. Defaults to 1.
+            show (bool): Whether to display the visualization results in a
+                popup window. Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            no_save_vis (bool): Whether to force not to save prediction
+                vis results. Defaults to False.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DetDataSample`. Defaults to False.
+            print_result (bool): Whether to print the inference result w/o
+                visualization to the console. Defaults to False.
+            no_save_pred (bool): Whether to force not to save prediction
+                results. Defaults to True.
+            out_dir: Dir to save the inference results or
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+            texts (str | list[str]): Text prompts. Defaults to None.
+            stuff_texts (str | list[str]): Stuff text prompts of open
+                panoptic task. Defaults to None.
+            custom_entities (bool): Whether to use custom entities.
+                Defaults to False. Only used in GLIP.
+            **kwargs: Other keyword arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
+                and ``postprocess_kwargs``.
+        Returns:
+            dict: Inference and visualization results.
+        """
+        (
+            preprocess_kwargs,
+            forward_kwargs,
+            visualize_kwargs,
+            postprocess_kwargs,
+        ) = self._dispatch_kwargs(**kwargs)
+        ori_inputs = self._inputs_to_list(inputs)
+        if texts is not None and isinstance(texts, str):
+            texts = [texts] * len(ori_inputs)
+        if stuff_texts is not None and isinstance(stuff_texts, str):
+            stuff_texts = [stuff_texts] * len(ori_inputs)
+        if texts is not None:
+            assert len(texts) == len(ori_inputs)
+            for i in range(len(texts)):
+                if isinstance(ori_inputs[i], str):
+                    ori_inputs[i] = {
+                        'text': texts[i],
+                        'img_path': ori_inputs[i],
+                        'custom_entities': custom_entities
+                    }
+                else:
+                    ori_inputs[i] = {
+                        'text': texts[i],
+                        'img': ori_inputs[i],
+                        'custom_entities': custom_entities
+                    }
+        if stuff_texts is not None:
+            assert len(stuff_texts) == len(ori_inputs)
+            for i in range(len(stuff_texts)):
+                ori_inputs[i]['stuff_text'] = stuff_texts[i]
+        inputs = self.preprocess(
+            ori_inputs, batch_size=batch_size, **preprocess_kwargs)
+        results_dict = {'predictions': [], 'visualization': []}
+        for ori_imgs, data in (track(inputs, description='Inference')
+                               if self.show_progress else inputs):
+            preds = self.forward(data, **forward_kwargs)
+            visualization = self.visualize(
+                ori_imgs,
+                preds,
+                return_vis=return_vis,
+                show=show,
+                wait_time=wait_time,
+                draw_pred=draw_pred,
+                pred_score_thr=pred_score_thr,
+                no_save_vis=no_save_vis,
+                img_out_dir=out_dir,
+                **visualize_kwargs)
+            results = self.postprocess(
+                preds,
+                visualization,
+                return_datasamples=return_datasamples,
+                print_result=print_result,
+                no_save_pred=no_save_pred,
+                pred_out_dir=out_dir,
+                **postprocess_kwargs)
+            results_dict['predictions'].extend(results['predictions'])
+            if results['visualization'] is not None:
+                results_dict['visualization'].extend(results['visualization'])
+        return results_dict
+    def visualize(self,
+                  inputs: InputsType,
+                  preds: PredType,
+                  return_vis: bool = False,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_pred: bool = True,
+                  pred_score_thr: float = 0.3,
+                  no_save_vis: bool = False,
+                  img_out_dir: str = '',
+                  **kwargs) -> Union[List[np.ndarray], None]:
+        """Visualize predictions.
+        Args:
+            inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
+            preds (List[:obj:`DetDataSample`]): Predictions of the model.
+            return_vis (bool): Whether to return the visualization result.
+                Defaults to False.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            no_save_vis (bool): Whether to force not to save prediction
+                vis results. Defaults to False.
+            img_out_dir (str): Output directory of visualization results.
+                If left as empty, no file will be saved. Defaults to ''.
+        Returns:
+            List[np.ndarray] or None: Returns visualization results only if
+            applicable.
+        """
+        if no_save_vis is True:
+            img_out_dir = ''
+        if not show and img_out_dir == '' and not return_vis:
+            return None
+        if self.visualizer is None:
+            raise ValueError('Visualization needs the "visualizer" term'
+                             'defined in the config, but got None.')
+        results = []
+        for single_input, pred in zip(inputs, preds):
+            if isinstance(single_input, str):
+                img_bytes = mmengine.fileio.get(single_input)
+                img = mmcv.imfrombytes(img_bytes)
+                img = img[:, :, ::-1]
+                img_name = osp.basename(single_input)
+            elif isinstance(single_input, np.ndarray):
+                img = single_input.copy()
+                img_num = str(self.num_visualized_imgs).zfill(8)
+                img_name = f'{img_num}.jpg'
+            else:
+                raise ValueError('Unsupported input type: '
+                                 f'{type(single_input)}')
+            out_file = osp.join(img_out_dir, 'vis',
+                                img_name) if img_out_dir != '' else None
+            self.visualizer.add_datasample(
+                img_name,
+                img,
+                pred,
+                show=show,
+                wait_time=wait_time,
+                draw_gt=False,
+                draw_pred=draw_pred,
+                pred_score_thr=pred_score_thr,
+                out_file=out_file,
+            )
+            results.append(self.visualizer.get_image())
+            self.num_visualized_imgs += 1
+        return results
+    def postprocess(
+        self,
+        preds: PredType,
+        visualization: Optional[List[np.ndarray]] = None,
+        return_datasamples: bool = False,
+        print_result: bool = False,
+        no_save_pred: bool = False,
+        pred_out_dir: str = '',
+        **kwargs,
+    ) -> Dict:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
+        This method should be responsible for the following tasks:
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
+        Args:
+            preds (List[:obj:`DetDataSample`]): Predictions of the model.
+            visualization (Optional[np.ndarray]): Visualized predictions.
+            return_datasamples (bool): Whether to use Datasample to store
+                inference results. If False, dict will be used.
+            print_result (bool): Whether to print the inference result w/o
+                visualization to the console. Defaults to False.
+            no_save_pred (bool): Whether to force not to save prediction
+                results. Defaults to False.
+            pred_out_dir: Dir to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``.
+            - ``visualization`` (Any): Returned by :meth:`visualize`.
+            - ``predictions`` (dict or DataSample): Returned by
+                :meth:`forward` and processed in :meth:`postprocess`.
+                If ``return_datasamples=False``, it usually should be a
+                json-serializable dict containing only basic data elements such
+                as strings and numbers.
+        """
+        if no_save_pred is True:
+            pred_out_dir = ''
+        result_dict = {}
+        results = preds
+        if not return_datasamples:
+            results = []
+            for pred in preds:
+                result = self.pred2dict(pred, pred_out_dir)
+                results.append(result)
+        elif pred_out_dir != '':
+            warnings.warn('Currently does not support saving datasample '
+                          'when return_datasamples is set to True. '
+                          'Prediction results are not saved!')
+        # Add img to the results after printing and dumping
+        result_dict['predictions'] = results
+        if print_result:
+            print(result_dict)
+        result_dict['visualization'] = visualization
+        return result_dict
+    # TODO: The data format and fields saved in json need further discussion.
+    #  Maybe should include model name, timestamp, filename, image info etc.
+    def pred2dict(self,
+                  data_sample: DetDataSample,
+                  pred_out_dir: str = '') -> Dict:
+        """Extract elements necessary to represent a prediction into a
+        dictionary.
+        It's better to contain only basic data elements such as strings and
+        numbers in order to guarantee it's json-serializable.
+        Args:
+            data_sample (:obj:`DetDataSample`): Predictions of the model.
+            pred_out_dir: Dir to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        Returns:
+            dict: Prediction results.
+        """
+        is_save_pred = True
+        if pred_out_dir == '':
+            is_save_pred = False
+        if is_save_pred and 'img_path' in data_sample:
+            img_path = osp.basename(data_sample.img_path)
+            img_path = osp.splitext(img_path)[0]
+            out_img_path = osp.join(pred_out_dir, 'preds',
+                                    img_path + '_panoptic_seg.png')
+            out_json_path = osp.join(pred_out_dir, 'preds', img_path + '.json')
+        elif is_save_pred:
+            out_img_path = osp.join(
+                pred_out_dir, 'preds',
+                f'{self.num_predicted_imgs}_panoptic_seg.png')
+            out_json_path = osp.join(pred_out_dir, 'preds',
+                                     f'{self.num_predicted_imgs}.json')
+            self.num_predicted_imgs += 1
+        result = {}
+        if 'pred_instances' in data_sample:
+            masks = data_sample.pred_instances.get('masks')
+            pred_instances = data_sample.pred_instances.numpy()
+            result = {
+                'labels': pred_instances.labels.tolist(),
+                'scores': pred_instances.scores.tolist()
+            }
+            if 'bboxes' in pred_instances:
+                result['bboxes'] = pred_instances.bboxes.tolist()
+            if masks is not None:
+                if 'bboxes' not in pred_instances or pred_instances.bboxes.sum(
+                ) == 0:
+                    # Fake bbox, such as the SOLO.
+                    bboxes = mask2bbox(masks.cpu()).numpy().tolist()
+                    result['bboxes'] = bboxes
+                encode_masks = encode_mask_results(pred_instances.masks)
+                for encode_mask in encode_masks:
+                    if isinstance(encode_mask['counts'], bytes):
+                        encode_mask['counts'] = encode_mask['counts'].decode()
+                result['masks'] = encode_masks
+        if 'pred_panoptic_seg' in data_sample:
+            if VOID is None:
+                raise RuntimeError(
+                    'panopticapi is not installed, please install it by: '
+                    'pip install git+https://github.com/cocodataset/'
+                    'panopticapi.git.')
+            pan = data_sample.pred_panoptic_seg.sem_seg.cpu().numpy()[0]
+            pan[pan % INSTANCE_OFFSET == len(
+                self.model.dataset_meta['classes'])] = VOID
+            pan = id2rgb(pan).astype(np.uint8)
+            if is_save_pred:
+                mmcv.imwrite(pan[:, :, ::-1], out_img_path)
+                result['panoptic_seg_path'] = out_img_path
+            else:
+                result['panoptic_seg'] = pan
+        if is_save_pred:
+            mmengine.dump(result, out_json_path)
+        return result

mmdet/apis/inference.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import warnings
+from pathlib import Path
+from typing import Optional, Sequence, Union
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.ops import RoIPool
+from mmcv.transforms import Compose
+from mmengine.config import Config
+from mmengine.dataset import default_collate
+from mmengine.model.utils import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmengine.runner import load_checkpoint
+from mmdet.registry import DATASETS
+from mmdet.utils import ConfigType
+from ..evaluation import get_classes
+from ..registry import MODELS
+from ..structures import DetDataSample, SampleList
+from ..utils import get_test_pipeline_cfg
+def init_detector(
+    config: Union[str, Path, Config],
+    checkpoint: Optional[str] = None,
+    palette: str = 'none',
+    device: str = 'cuda:0',
+    cfg_options: Optional[dict] = None,
+) -> nn.Module:
+    """Initialize a detector from config file.
+    Args:
+        config (str, :obj:`Path`, or :obj:`mmengine.Config`): Config file path,
+            :obj:`Path`, or the config object.
+        checkpoint (str, optional): Checkpoint path. If left as None, the model
+            will not load any weights.
+        palette (str): Color palette used for visualization. If palette
+            is stored in checkpoint, use checkpoint's palette first, otherwise
+            use externally passed palette. Currently, supports 'coco', 'voc',
+            'citys' and 'random'. Defaults to none.
+        device (str): The device where the anchors will be put on.
+            Defaults to cuda:0.
+        cfg_options (dict, optional): Options to override some settings in
+            the used config.
+    Returns:
+        nn.Module: The constructed detector.
+    """
+    if isinstance(config, (str, Path)):
+        config = Config.fromfile(config)
+    elif not isinstance(config, Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if cfg_options is not None:
+        config.merge_from_dict(cfg_options)
+    elif 'init_cfg' in config.model.backbone:
+        config.model.backbone.init_cfg = None
+    scope = config.get('default_scope', 'mmdet')
+    if scope is not None:
+        init_default_scope(config.get('default_scope', 'mmdet'))
+    model = MODELS.build(config.model)
+    model = revert_sync_batchnorm(model)
+    if checkpoint is None:
+        warnings.simplefilter('once')
+        warnings.warn('checkpoint is None, use COCO classes by default.')
+        model.dataset_meta = {'classes': get_classes('coco')}
+    else:
+        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
+        # Weights converted from elsewhere may not have meta fields.
+        checkpoint_meta = checkpoint.get('meta', {})
+        # save the dataset_meta in the model for convenience
+        if 'dataset_meta' in checkpoint_meta:
+            # mmdet 3.x, all keys should be lowercase
+            model.dataset_meta = {
+                k.lower(): v
+                for k, v in checkpoint_meta['dataset_meta'].items()
+            }
+        elif 'CLASSES' in checkpoint_meta:
+            # < mmdet 3.x
+            classes = checkpoint_meta['CLASSES']
+            model.dataset_meta = {'classes': classes}
+        else:
+            warnings.simplefilter('once')
+            warnings.warn(
+                'dataset_meta or class names are not saved in the '
+                'checkpoint\'s meta data, use COCO classes by default.')
+            model.dataset_meta = {'classes': get_classes('coco')}
+    # Priority:  args.palette -> config -> checkpoint
+    if palette != 'none':
+        model.dataset_meta['palette'] = palette
+    else:
+        test_dataset_cfg = copy.deepcopy(config.test_dataloader.dataset)
+        # lazy init. We only need the metainfo.
+        test_dataset_cfg['lazy_init'] = True
+        metainfo = DATASETS.build(test_dataset_cfg).metainfo
+        cfg_palette = metainfo.get('palette', None)
+        if cfg_palette is not None:
+            model.dataset_meta['palette'] = cfg_palette
+        else:
+            if 'palette' not in model.dataset_meta:
+                warnings.warn(
+                    'palette does not exist, random is used by default. '
+                    'You can also set the palette to customize.')
+                model.dataset_meta['palette'] = 'random'
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model
+ImagesType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]]
+def inference_detector(
+    model: nn.Module,
+    imgs: ImagesType,
+    test_pipeline: Optional[Compose] = None,
+    text_prompt: Optional[str] = None,
+    custom_entities: bool = False,
+) -> Union[DetDataSample, SampleList]:
+    """Inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str, ndarray, Sequence[str/ndarray]):
+           Either image files or loaded images.
+        test_pipeline (:obj:`Compose`): Test pipeline.
+    Returns:
+        :obj:`DetDataSample` or list[:obj:`DetDataSample`]:
+        If imgs is a list or tuple, the same length list type results
+        will be returned, otherwise return the detection results directly.
+    """
+    if isinstance(imgs, (list, tuple)):
+        is_batch = True
+    else:
+        imgs = [imgs]
+        is_batch = False
+    cfg = model.cfg
+    if test_pipeline is None:
+        cfg = cfg.copy()
+        test_pipeline = get_test_pipeline_cfg(cfg)
+        if isinstance(imgs[0], np.ndarray):
+            # Calling this method across libraries will result
+            # in module unregistered error if not prefixed with mmdet.
+            test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
+        test_pipeline = Compose(test_pipeline)
+    if model.data_preprocessor.device.type == 'cpu':
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), 'CPU inference with RoIPool is not supported currently.'
+    result_list = []
+    for i, img in enumerate(imgs):
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # TODO: remove img_id.
+            data_ = dict(img=img, img_id=0)
+        else:
+            # TODO: remove img_id.
+            data_ = dict(img_path=img, img_id=0)
+        if text_prompt:
+            data_['text'] = text_prompt
+            data_['custom_entities'] = custom_entities
+        # build the data pipeline
+        data_ = test_pipeline(data_)
+        data_['inputs'] = [data_['inputs']]
+        data_['data_samples'] = [data_['data_samples']]
+        # forward the model
+        with torch.no_grad():
+            results = model.test_step(data_)[0]
+        result_list.append(results)
+    if not is_batch:
+        return result_list[0]
+    else:
+        return result_list
+# TODO: Awaiting refactoring
+async def async_inference_detector(model, imgs):
+    """Async inference image(s) with the detector.
+    Args:
+        model (nn.Module): The loaded detector.
+        img (str | ndarray): Either image files or loaded images.
+    Returns:
+        Awaitable detection results.
+    """
+    if not isinstance(imgs, (list, tuple)):
+        imgs = [imgs]
+    cfg = model.cfg
+    if isinstance(imgs[0], np.ndarray):
+        cfg = cfg.copy()
+        # set loading pipeline type
+        cfg.data.test.pipeline[0].type = 'LoadImageFromNDArray'
+    # cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
+    test_pipeline = Compose(cfg.data.test.pipeline)
+    datas = []
+    for img in imgs:
+        # prepare data
+        if isinstance(img, np.ndarray):
+            # directly add img
+            data = dict(img=img)
+        else:
+            # add information into dict
+            data = dict(img_info=dict(filename=img), img_prefix=None)
+        # build the data pipeline
+        data = test_pipeline(data)
+        datas.append(data)
+    for m in model.modules():
+        assert not isinstance(
+            m,
+            RoIPool), 'CPU inference with RoIPool is not supported currently.'
+    # We don't restore `torch.is_grad_enabled()` value during concurrent
+    # inference since execution can overlap
+    torch.set_grad_enabled(False)
+    results = await model.aforward_test(data, rescale=True)
+    return results
+def build_test_pipeline(cfg: ConfigType) -> ConfigType:
+    """Build test_pipeline for mot/vis demo. In mot/vis infer, original
+    test_pipeline should remove the "LoadImageFromFile" and
+    "LoadTrackAnnotations".
+    Args:
+         cfg (ConfigDict): The loaded config.
+    Returns:
+         ConfigType: new test_pipeline
+    """
+    # remove the "LoadImageFromFile" and "LoadTrackAnnotations" in pipeline
+    transform_broadcaster = cfg.test_dataloader.dataset.pipeline[0].copy()
+    for transform in transform_broadcaster['transforms']:
+        if transform['type'] == 'Resize':
+            transform_broadcaster['transforms'] = transform
+    pack_track_inputs = cfg.test_dataloader.dataset.pipeline[-1].copy()
+    test_pipeline = Compose([transform_broadcaster, pack_track_inputs])
+    return test_pipeline
+def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
+                  video_len: int) -> SampleList:
+    """Inference image(s) with the mot model.
+    Args:
+        model (nn.Module): The loaded mot model.
+        img (np.ndarray): Loaded image.
+        frame_id (int): frame id.
+        video_len (int): demo video length
+    Returns:
+        SampleList: The tracking data samples.
+    """
+    cfg = model.cfg
+    data = dict(
+        img=[img.astype(np.float32)],
+        frame_id=[frame_id],
+        ori_shape=[img.shape[:2]],
+        img_id=[frame_id + 1],
+        ori_video_length=[video_len])
+    test_pipeline = build_test_pipeline(cfg)
+    data = test_pipeline(data)
+    if not next(model.parameters()).is_cuda:
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), 'CPU inference with RoIPool is not supported currently.'
+    # forward the model
+    with torch.no_grad():
+        data = default_collate([data])
+        result = model.test_step(data)[0]
+    return result
+def init_track_model(config: Union[str, Config],
+                     checkpoint: Optional[str] = None,
+                     detector: Optional[str] = None,
+                     reid: Optional[str] = None,
+                     device: str = 'cuda:0',
+                     cfg_options: Optional[dict] = None) -> nn.Module:
+    """Initialize a model from config file.
+    Args:
+        config (str or :obj:`mmengine.Config`): Config file path or the config
+            object.
+        checkpoint (Optional[str], optional): Checkpoint path. Defaults to
+            None.
+        detector (Optional[str], optional): Detector Checkpoint path, use in
+            some tracking algorithms like sort.  Defaults to None.
+        reid (Optional[str], optional): Reid checkpoint path. use in
+            some tracking algorithms like sort. Defaults to None.
+        device (str, optional): The device that the model inferences on.
+            Defaults to `cuda:0`.
+        cfg_options (Optional[dict], optional): Options to override some
+            settings in the used config. Defaults to None.
+    Returns:
+        nn.Module: The constructed model.
+    """
+    if isinstance(config, str):
+        config = Config.fromfile(config)
+    elif not isinstance(config, Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if cfg_options is not None:
+        config.merge_from_dict(cfg_options)
+    model = MODELS.build(config.model)
+    if checkpoint is not None:
+        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
+        # Weights converted from elsewhere may not have meta fields.
+        checkpoint_meta = checkpoint.get('meta', {})
+        # save the dataset_meta in the model for convenience
+        if 'dataset_meta' in checkpoint_meta:
+            if 'CLASSES' in checkpoint_meta['dataset_meta']:
+                value = checkpoint_meta['dataset_meta'].pop('CLASSES')
+                checkpoint_meta['dataset_meta']['classes'] = value
+            model.dataset_meta = checkpoint_meta['dataset_meta']
+    if detector is not None:
+        assert not (checkpoint and detector), \
+            'Error: checkpoint and detector checkpoint cannot both exist'
+        load_checkpoint(model.detector, detector, map_location='cpu')
+    if reid is not None:
+        assert not (checkpoint and reid), \
+            'Error: checkpoint and reid checkpoint cannot both exist'
+        load_checkpoint(model.reid, reid, map_location='cpu')
+    # Some methods don't load checkpoints or checkpoints don't contain
+    # 'dataset_meta'
+    # VIS need dataset_meta, MOT don't need dataset_meta
+    if not hasattr(model, 'dataset_meta'):
+        warnings.warn('dataset_meta or class names are missed, '
+                      'use None by default.')
+        model.dataset_meta = {'classes': None}
+    model.cfg = config  # save the config in the model for convenience
+    model.to(device)
+    model.eval()
+    return model

mmdet/configs/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

mmdet/configs/_base_/datasets/coco_detection.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms import (LoadAnnotations, PackDetInputs,
+                                       RandomFlip, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric='bbox',
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=2,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type=DefaultSampler, shuffle=False),
+#     dataset=dict(
+#         type=dataset_type,
+#         data_root=data_root,
+#         ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type=CocoMetric,
+#     metric='bbox',
+#     format_only=True,
+#     ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_detection/test')

mmdet/configs/_base_/datasets/coco_instance.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets.coco import CocoDataset
+from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import LoadAnnotations
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation.metrics.coco_metric import CocoMetric
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=2,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type=DefaultSampler, shuffle=False),
+#     dataset=dict(
+#         type=CocoDataset,
+#         data_root=data_root,
+#         ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type=CocoMetric,
+#     metric=['bbox', 'segm'],
+#     format_only=True,
+#     ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_instance/test')

mmdet/configs/_base_/datasets/coco_instance_semantic.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets.coco import CocoDataset
+from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import LoadAnnotations
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation.metrics.coco_metric import CocoMetric
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True, with_seg=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=CocoDataset,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator

mmdet/configs/_base_/datasets/coco_panoptic.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms.loading import LoadImageFromFile
+from mmengine.dataset.sampler import DefaultSampler
+from mmdet.datasets.coco_panoptic import CocoPanopticDataset
+from mmdet.datasets.samplers.batch_sampler import AspectRatioBatchSampler
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import LoadPanopticAnnotations
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation.metrics.coco_panoptic_metric import CocoPanopticMetric
+# dataset settings
+dataset_type = 'CocoPanopticDataset'
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadPanopticAnnotations, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadPanopticAnnotations, backend_args=backend_args),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=CocoPanopticDataset,
+        data_root=data_root,
+        ann_file='annotations/panoptic_train2017.json',
+        data_prefix=dict(
+            img='train2017/', seg='annotations/panoptic_train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=CocoPanopticDataset,
+        data_root=data_root,
+        ann_file='annotations/panoptic_val2017.json',
+        data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoPanopticMetric,
+    ann_file=data_root + 'annotations/panoptic_val2017.json',
+    seg_prefix=data_root + 'annotations/panoptic_val2017/',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=1,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type=DefaultSampler, shuffle=False),
+#     dataset=dict(
+#         type=CocoPanopticDataset,
+#         data_root=data_root,
+#         ann_file='annotations/panoptic_image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type=CocoPanopticMetric,
+#     format_only=True,
+#     ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_panoptic/test')

mmdet/configs/_base_/datasets/mot_challenge.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import (LoadImageFromFile, RandomResize,
+                             TransformBroadcaster)
+from mmdet.datasets import MOTChallengeDataset
+from mmdet.datasets.samplers import TrackImgSampler
+from mmdet.datasets.transforms import (LoadTrackAnnotations, PackTrackInputs,
+                                       PhotoMetricDistortion, RandomCrop,
+                                       RandomFlip, Resize,
+                                       UniformRefFrameSample)
+from mmdet.evaluation import MOTChallengeMetric
+# dataset settings
+dataset_type = MOTChallengeDataset
+data_root = 'data/MOT17/'
+img_scale = (1088, 1088)
+backend_args = None
+# data pipeline
+train_pipeline = [
+    dict(
+        type=UniformRefFrameSample,
+        num_ref_imgs=1,
+        frame_range=10,
+        filter_key_img=True),
+    dict(
+        type=TransformBroadcaster,
+        share_random_params=True,
+        transforms=[
+            dict(type=LoadImageFromFile, backend_args=backend_args),
+            dict(type=LoadTrackAnnotations),
+            dict(
+                type=RandomResize,
+                scale=img_scale,
+                ratio_range=(0.8, 1.2),
+                keep_ratio=True,
+                clip_object_border=False),
+            dict(type=PhotoMetricDistortion)
+        ]),
+    dict(
+        type=TransformBroadcaster,
+        # different cropped positions for different frames
+        share_random_params=False,
+        transforms=[
+            dict(type=RandomCrop, crop_size=img_scale, bbox_clip_border=False)
+        ]),
+    dict(
+        type=TransformBroadcaster,
+        share_random_params=True,
+        transforms=[
+            dict(type=RandomFlip, prob=0.5),
+        ]),
+    dict(type=PackTrackInputs)
+]
+test_pipeline = [
+    dict(
+        type=TransformBroadcaster,
+        transforms=[
+            dict(type=LoadImageFromFile, backend_args=backend_args),
+            dict(type=Resize, scale=img_scale, keep_ratio=True),
+            dict(type=LoadTrackAnnotations)
+        ]),
+    dict(type=PackTrackInputs)
+]
+# dataloader
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=TrackImgSampler),  # image-based sampling
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        visibility_thr=-1,
+        ann_file='annotations/half-train_cocoformat.json',
+        data_prefix=dict(img_path='train'),
+        metainfo=dict(classes=('pedestrian', )),
+        pipeline=train_pipeline))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    # Now we support two ways to test, image_based and video_based
+    # if you want to use video_based sampling, you can use as follows
+    # sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    sampler=dict(type=TrackImgSampler),  # image-based sampling
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/half-val_cocoformat.json',
+        data_prefix=dict(img_path='train'),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+# evaluator
+val_evaluator = dict(
+    type=MOTChallengeMetric, metric=['HOTA', 'CLEAR', 'Identity'])
+test_evaluator = val_evaluator

mmdet/configs/_base_/default_runtime.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
+                            LoggerHook, ParamSchedulerHook)
+from mmengine.runner import LogProcessor
+from mmengine.visualization import LocalVisBackend
+from mmdet.engine.hooks import DetVisualizationHook
+from mmdet.visualization import DetLocalVisualizer
+default_scope = None
+default_hooks = dict(
+    timer=dict(type=IterTimerHook),
+    logger=dict(type=LoggerHook, interval=50),
+    param_scheduler=dict(type=ParamSchedulerHook),
+    checkpoint=dict(type=CheckpointHook, interval=1),
+    sampler_seed=dict(type=DistSamplerSeedHook),
+    visualization=dict(type=DetVisualizationHook))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+vis_backends = [dict(type=LocalVisBackend)]
+visualizer = dict(
+    type=DetLocalVisualizer, vis_backends=vis_backends, name='visualizer')
+log_processor = dict(type=LogProcessor, window_size=50, by_epoch=True)
+log_level = 'INFO'
+load_from = None
+resume = False

mmdet/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
+from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=CascadeRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type=CascadeRoIHead,
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
+        ],
+        mask_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type=FCNMaskHead,
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

mmdet/configs/_base_/models/cascade_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.cascade_rcnn import CascadeRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import SmoothL1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.cascade_roi_head import CascadeRoIHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=CascadeRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=SmoothL1Loss, beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type=CascadeRoIHead,
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0)),
+            dict(
+                type=Shared2FCBBoxHead,
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type=DeltaXYWHBBoxCoder,
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type=SmoothL1Loss, beta=1.0, loss_weight=1.0))
+        ]),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type=MaxIoUAssigner,
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type=RandomSampler,
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100)))

mmdet/configs/_base_/models/faster_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.faster_rcnn import FasterRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=FasterRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    roi_head=dict(
+        type=StandardRoIHead,
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type=Shared2FCBBoxHead,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type=DeltaXYWHBBoxCoder,
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type=L1Loss, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))

mmdet/configs/_base_/models/mask_rcnn_r50_caffe_c4.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from mmengine.model.weight_init import PretrainedInit
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.mask_rcnn import MaskRCNN
+from mmdet.models.layers import ResLayer
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead
+from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+norm_cfg = dict(type=BatchNorm2d, requires_grad=False)
+# model settings
+model = dict(
+    type=MaskRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[103.530, 116.280, 123.675],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=3,
+        strides=(1, 2, 2),
+        dilations=(1, 1, 1),
+        out_indices=(2, ),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type=PretrainedInit,
+            checkpoint='open-mmlab://detectron2/resnet50_caffe')),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=1024,
+        feat_channels=1024,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[2, 4, 8, 16, 32],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[16]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    roi_head=dict(
+        type=StandardRoIHead,
+        shared_head=dict(
+            type=ResLayer,
+            depth=50,
+            stage=3,
+            stride=2,
+            dilation=1,
+            style='caffe',
+            norm_cfg=norm_cfg,
+            norm_eval=True),
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
+            out_channels=1024,
+            featmap_strides=[16]),
+        bbox_head=dict(
+            type=BBoxHead,
+            with_avg_pool=True,
+            roi_feat_size=7,
+            in_channels=2048,
+            num_classes=80,
+            bbox_coder=dict(
+                type=DeltaXYWHBBoxCoder,
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+        mask_roi_extractor=None,
+        mask_head=dict(
+            type=FCNMaskHead,
+            num_convs=0,
+            in_channels=2048,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=12000,
+            max_per_img=2000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=14,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=6000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

mmdet/configs/_base_/models/mask_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,154 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import RoIAlign, nms
+from mmengine.model.weight_init import PretrainedInit
+from torch.nn import BatchNorm2d
+from mmdet.models.backbones.resnet import ResNet
+from mmdet.models.data_preprocessors.data_preprocessor import \
+    DetDataPreprocessor
+from mmdet.models.dense_heads.rpn_head import RPNHead
+from mmdet.models.detectors.mask_rcnn import MaskRCNN
+from mmdet.models.losses.cross_entropy_loss import CrossEntropyLoss
+from mmdet.models.losses.smooth_l1_loss import L1Loss
+from mmdet.models.necks.fpn import FPN
+from mmdet.models.roi_heads.bbox_heads.convfc_bbox_head import \
+    Shared2FCBBoxHead
+from mmdet.models.roi_heads.mask_heads.fcn_mask_head import FCNMaskHead
+from mmdet.models.roi_heads.roi_extractors.single_level_roi_extractor import \
+    SingleRoIExtractor
+from mmdet.models.roi_heads.standard_roi_head import StandardRoIHead
+from mmdet.models.task_modules.assigners.max_iou_assigner import MaxIoUAssigner
+from mmdet.models.task_modules.coders.delta_xywh_bbox_coder import \
+    DeltaXYWHBBoxCoder
+from mmdet.models.task_modules.prior_generators.anchor_generator import \
+    AnchorGenerator
+from mmdet.models.task_modules.samplers.random_sampler import RandomSampler
+# model settings
+model = dict(
+    type=MaskRCNN,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_mask=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(
+            type=PretrainedInit, checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type=RPNHead,
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=CrossEntropyLoss, use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    roi_head=dict(
+        type=StandardRoIHead,
+        bbox_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type=Shared2FCBBoxHead,
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type=DeltaXYWHBBoxCoder,
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type=SingleRoIExtractor,
+            roi_layer=dict(type=RoIAlign, output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type=FCNMaskHead,
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type=CrossEntropyLoss, use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type=MaxIoUAssigner,
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type=RandomSampler,
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type=nms, iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type=nms, iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

mmdet/configs/_base_/models/retinanet_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.ops import nms
+from torch.nn import BatchNorm2d
+from mmdet.models import (FPN, DetDataPreprocessor, FocalLoss, L1Loss, ResNet,
+                          RetinaHead, RetinaNet)
+from mmdet.models.task_modules import (AnchorGenerator, DeltaXYWHBBoxCoder,
+                                       MaxIoUAssigner, PseudoSampler)
+# model settings
+model = dict(
+    type=RetinaNet,
+    data_preprocessor=dict(
+        type=DetDataPreprocessor,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type=ResNet,
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type=BatchNorm2d, requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type=FPN,
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type=RetinaHead,
+        num_classes=80,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type=AnchorGenerator,
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type=DeltaXYWHBBoxCoder,
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type=FocalLoss,
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type=L1Loss, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type=MaxIoUAssigner,
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type=PseudoSampler),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type=nms, iou_threshold=0.5),
+        max_per_img=100))

mmdet/configs/_base_/schedules/schedule_1x.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim.sgd import SGD
+# training schedule for 1x
+train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

mmdet/configs/_base_/schedules/schedule_2x.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim.sgd import SGD
+# training schedule for 1x
+train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=24, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

mmdet/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.datasets.coco_instance import *
+    from .._base_.default_runtime import *
+    from .._base_.models.cascade_mask_rcnn_r50_fpn import *
+    from .._base_.schedules.schedule_1x import *

mmdet/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.datasets.coco_detection import *
+    from .._base_.default_runtime import *
+    from .._base_.models.cascade_rcnn_r50_fpn import *
+    from .._base_.schedules.schedule_1x import *

mmdet/configs/common/lsj_100e_coco_detection.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmengine.dataset.sampler import DefaultSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import CocoDataset, RepeatDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+image_size = (1024, 1024)
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=RandomResize,
+        scale=image_size,
+        ratio_range=(0.1, 2.0),
+        keep_ratio=True),
+    dict(
+        type=RandomCrop,
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+# Use RepeatDataset to speed up training
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    dataset=dict(
+        type=RepeatDataset,
+        times=4,  # simply change this from 2 to 16 for 50e - 400e training.
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+max_epochs = 25
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# optimizer assumes bs=64
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]
+# only keep latest 2 checkpoints
+default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)

mmdet/configs/common/lsj_100e_coco_instance.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmengine.dataset.sampler import DefaultSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import CocoDataset, RepeatDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+image_size = (1024, 1024)
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=RandomResize,
+        scale=image_size,
+        ratio_range=(0.1, 2.0),
+        keep_ratio=True),
+    dict(
+        type=RandomCrop,
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type=FilterAnnotations, min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+# Use RepeatDataset to speed up training
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    dataset=dict(
+        type=RepeatDataset,
+        times=4,  # simply change this from 2 to 16 for 50e - 400e training.
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric=['bbox', 'segm'],
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+max_epochs = 25
+train_cfg = dict(
+    type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=5)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# optimizer assumes bs=64
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.00004))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]
+# only keep latest 2 checkpoints
+default_hooks.update(dict(checkpoint=dict(max_keep_ckpts=2)))
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)

mmdet/configs/common/lsj_200e_coco_detection.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .lsj_100e_coco_detection import *
+# 8x25=200e
+train_dataloader.update(dict(dataset=dict(times=8)))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=25,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]

mmdet/configs/common/lsj_200e_coco_instance.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .lsj_100e_coco_instance import *
+# 8x25=200e
+train_dataloader.update(dict(dataset=dict(times=8)))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.067, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=25,
+        by_epoch=True,
+        milestones=[22, 24],
+        gamma=0.1)
+]

mmdet/configs/common/ms_3x_coco.py ADDED Viewed

	@@ -0,0 +1,130 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import RandomFlip, Resize
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
+# multiscale_mode='range'
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(type=RandomResize, scale=[(1333, 640), (1333, 800)], keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    pin_memory=True,
+    sampler=dict(type=DefaultSampler, shuffle=True),
+    batch_sampler=dict(type=AspectRatioBatchSampler),
+    dataset=dict(
+        type=RepeatDataset,
+        times=3,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type=DefaultSampler, shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type=CocoMetric,
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+# training schedule for 3x with `RepeatDataset`
+train_cfg = dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1)
+val_cfg = dict(type=ValLoop)
+test_cfg = dict(type=TestLoop)
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=False,
+        milestones=[9, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper = dict(
+    type=OptimWrapper,
+    optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)

mmdet/configs/common/ms_3x_coco_instance.py ADDED Viewed

	@@ -0,0 +1,136 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type='RandomResize', scale=[(1333, 640), (1333, 800)],
+        keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(type=LoadAnnotations, with_bbox=True, with_mask=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        sampler=dict(type=DefaultSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=RepeatDataset,
+            times=3,
+            dataset=dict(
+                type=dataset_type,
+                data_root=data_root,
+                ann_file='annotations/instances_train2017.json',
+                data_prefix=dict(img='train2017/'),
+                filter_cfg=dict(filter_empty_gt=True, min_size=32),
+                pipeline=train_pipeline,
+                backend_args=backend_args))))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric='bbox',
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 3x with `RepeatDataset`
+train_cfg.update(dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=False,
+        milestones=[9, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))

mmdet/configs/common/ms_90k_coco.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# Align with Detectron2
+backend = 'pillow'
+train_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=RandomChoiceResize,
+        scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                (1333, 768), (1333, 800)],
+        keep_ratio=True,
+        backend=backend),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
+    dict(type=LoadAnnotations, with_bbox=True),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        pin_memory=True,
+        sampler=dict(type=InfiniteSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric='bbox',
+        format_only=False,
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 90k
+max_iter = 90000
+train_cfg.update(
+    dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_iter,
+        by_epoch=False,
+        milestones=[60000, 80000],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))
+default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
+log_processor.update(dict(by_epoch=False))

mmdet/configs/common/ms_poly_3x_coco_instance.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],
+# multiscale_mode='range'
+train_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type='RandomResize', scale=[(1333, 640), (1333, 800)],
+        keep_ratio=True),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(type=LoadImageFromFile, backend_args=backend_args),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=RepeatDataset,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric=['bbox', 'segm'],
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 3x with `RepeatDataset`
+train_cfg.update(dict(type=EpochBasedTrainLoop, max_iters=12, val_interval=1))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=12,
+        by_epoch=False,
+        milestones=[9, 11],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))

mmdet/configs/common/ms_poly_90k_coco_instance.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Please refer to https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta for more details. # noqa
+# mmcv >= 2.0.1
+# mmengine >= 0.8.0
+from mmengine.config import read_base
+with read_base():
+    from .._base_.default_runtime import *
+from mmcv.transforms import RandomChoiceResize
+from mmengine.dataset import RepeatDataset
+from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler
+from mmengine.optim import OptimWrapper
+from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
+from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
+from torch.optim import SGD
+from mmdet.datasets import AspectRatioBatchSampler, CocoDataset
+from mmdet.datasets.transforms.formatting import PackDetInputs
+from mmdet.datasets.transforms.loading import (FilterAnnotations,
+                                               LoadAnnotations,
+                                               LoadImageFromFile)
+from mmdet.datasets.transforms.transforms import (CachedMixUp, CachedMosaic,
+                                                  Pad, RandomCrop, RandomFlip,
+                                                  RandomResize, Resize)
+from mmdet.evaluation import CocoMetric
+# dataset settings
+dataset_type = CocoDataset
+data_root = 'data/coco/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+# Align with Detectron2
+backend = 'pillow'
+train_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type=RandomChoiceResize,
+        scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                (1333, 768), (1333, 800)],
+        keep_ratio=True,
+        backend=backend),
+    dict(type=RandomFlip, prob=0.5),
+    dict(type=PackDetInputs)
+]
+test_pipeline = [
+    dict(
+        type=LoadImageFromFile,
+        backend_args=backend_args,
+        imdecode_backend=backend),
+    dict(type=Resize, scale=(1333, 800), keep_ratio=True, backend=backend),
+    dict(
+        type=LoadAnnotations, with_bbox=True, with_mask=True, poly2mask=False),
+    dict(
+        type=PackDetInputs,
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader.update(
+    dict(
+        batch_size=2,
+        num_workers=2,
+        persistent_workers=True,
+        pin_memory=True,
+        sampler=dict(type=InfiniteSampler, shuffle=True),
+        batch_sampler=dict(type=AspectRatioBatchSampler),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_train2017.json',
+            data_prefix=dict(img='train2017/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline,
+            backend_args=backend_args)))
+val_dataloader.update(
+    dict(
+        batch_size=1,
+        num_workers=2,
+        persistent_workers=True,
+        drop_last=False,
+        pin_memory=True,
+        sampler=dict(type=DefaultSampler, shuffle=False),
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='annotations/instances_val2017.json',
+            data_prefix=dict(img='val2017/'),
+            test_mode=True,
+            pipeline=test_pipeline,
+            backend_args=backend_args)))
+test_dataloader = val_dataloader
+val_evaluator.update(
+    dict(
+        type=CocoMetric,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        metric=['bbox', 'segm'],
+        format_only=False,
+        backend_args=backend_args))
+test_evaluator = val_evaluator
+# training schedule for 90k
+max_iter = 90000
+train_cfg.update(
+    dict(type=IterBasedTrainLoop, max_iters=max_iter, val_interval=10000))
+val_cfg.update(dict(type=ValLoop))
+test_cfg.update(dict(type=TestLoop))
+# learning rate
+param_scheduler = [
+    dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
+    dict(
+        type=MultiStepLR,
+        begin=0,
+        end=max_iter,
+        by_epoch=False,
+        milestones=[60000, 80000],
+        gamma=0.1)
+]
+# optimizer
+optim_wrapper.update(
+    dict(
+        type=OptimWrapper,
+        optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001)))
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr.update(dict(enable=False, base_batch_size=16))
+default_hooks.update(dict(checkpoint=dict(by_epoch=False, interval=10000)))
+log_processor.update(dict(by_epoch=False))