diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fd2e17704bf6729f8846798c8d4b4e1834990416 Binary files /dev/null and b/.DS_Store differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..c7cc1e3563d973efc92986861b66b25640073719 --- /dev/null +++ b/app.py @@ -0,0 +1,63 @@ +import os + +import torch +import torch.nn.functional as F +import torchvision.transforms as T +from mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot +from mmseg.core.evaluation import get_palette +import mmcv + +import gradio as gr +from huggingface_hub import hf_hub_download + +# Device on which to run the model +# Set to cuda to load on GPU +device = "cpu" +checkpoint_file = hf_hub_download(repo_id="Andy1621/uniformer", filename="upernet_global_small.pth") +config_file = './exp/upernet_hybrid_small/config.py' +# init detector +# build the model from a config file and a checkpoint file +model = init_segmentor(config_file, checkpoint_file, device='cpu') + + +def set_example_image(example: list) -> dict: + return gr.Image.update(value=example[0]) + + +def inference(img): + result = inference_segmentor(model, img) + res_img = show_result_pyplot(model, img, result, get_palette('cityscapes')) + return res_img + + +demo = gr.Blocks() +with demo: + gr.Markdown( + """ + # UniFormer-S + Gradio demo for UniFormer: To use it, simply upload your image, or click one of the examples to load them. Read more at the links below. + """ + ) + + with gr.Box(): + with gr.Row(): + with gr.Column(): + with gr.Row(): + input_image = gr.Image(label='Input Image', type='numpy') + with gr.Row(): + submit_button = gr.Button('Submit') + with gr.Column(): + res_image = gr.Image(type='numpy', label='Segmentation Resutls') + with gr.Row(): + example_images = gr.Dataset(components=[input_image], samples=[['demo.jpg']]) + + gr.Markdown( + """ +

UniFormer: Unifying Convolution and Self-attention for Visual Recognition | Github Repo

+ """ + ) + + submit_button.click(fn=inference, inputs=input_image, outputs=res_image) + example_images.click(fn=set_example_image, inputs=example_images, outputs=example_images.components) + +demo.launch(enable_queue=True) \ No newline at end of file diff --git a/configs/.DS_Store b/configs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..44cfcc73b53ac540788a34f869001cc282b48591 Binary files /dev/null and b/configs/.DS_Store differ diff --git a/configs/_base_/.DS_Store b/configs/_base_/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7bdaa5f089348020d5ea7a9fea61d945d5b77788 Binary files /dev/null and b/configs/_base_/.DS_Store differ diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..efc8b4bb20c981f3db6df7eb52b3dc0744c94cc0 --- /dev/null +++ b/configs/_base_/datasets/ade20k.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..298594ea925f87f22b37094a2ec50e370aec96a0 --- /dev/null +++ b/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f21867c63e1835f6fceb61f066e802fd8fd2a735 --- /dev/null +++ b/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/cityscapes_769x769.py b/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..336c7b254fe392b4703039fec86a83acdbd2e1a5 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..06e8ff606e0d2a4514ec8b7d2c6c436a32efcbf4 --- /dev/null +++ b/configs/_base_/datasets/drive.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..242d790eb1b83e75cf6b7eaa7a35c674099311ad --- /dev/null +++ b/configs/_base_/datasets/hrf.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/pascal_context.py b/configs/_base_/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..ff65bad1b86d7e3a5980bb5b9fc55798dc8df5f4 --- /dev/null +++ b/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..37585abab89834b95cd5bdd993b994fca1db65f6 --- /dev/null +++ b/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1d42d0c5781f56dc177d860d856bb34adce555 --- /dev/null +++ b/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,57 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) diff --git a/configs/_base_/datasets/pascal_voc12_aug.py b/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..3f23b6717d53ad29f02dd15046802a2631a5076b --- /dev/null +++ b/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,9 @@ +_base_ = './pascal_voc12.py' +# dataset settings +data = dict( + train=dict( + ann_dir=['SegmentationClass', 'SegmentationClassAug'], + split=[ + 'ImageSets/Segmentation/train.txt', + 'ImageSets/Segmentation/aug.txt' + ])) diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..3f71b25488cc11a6b4d582ac52b5a24e1ad1cf8e --- /dev/null +++ b/configs/_base_/datasets/stare.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..b564cc4e7e7d9a67dacaaddecb100e4d8f5c005b --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,14 @@ +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +cudnn_benchmark = True diff --git a/configs/_base_/models/ann_r50-d8.py b/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..a2cb653827e44e6015b3b83bc578003e614a6aa1 --- /dev/null +++ b/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/apcnet_r50-d8.py b/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..c8f5316cbcf3896ba9de7ca2c801eba512f01d5e --- /dev/null +++ b/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/ccnet_r50-d8.py b/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..794148f576b9e215c3c6963e73dffe98204b7717 --- /dev/null +++ b/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/cgnet.py b/configs/_base_/models/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..eff8d9458c877c5db894957e0b1b4597e40da6ab --- /dev/null +++ b/configs/_base_/models/cgnet.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16)), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, + 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, + 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, + 10.396974, 10.055647 + ])), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/danet_r50-d8.py b/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..2c934939fac48525f22ad86f489a041dd7db7d09 --- /dev/null +++ b/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/deeplabv3_r50-d8.py b/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a43bee01422ad4795dd27874e0cd4bb6cbfecf --- /dev/null +++ b/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/deeplabv3_unet_s5-d16.py b/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..0cd262999d8b2cb8e14a5c32190ae73f479d8e81 --- /dev/null +++ b/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/configs/_base_/models/deeplabv3plus_r50-d8.py b/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..050e39e091d816df9028d23aa3ecf9db74e441e1 --- /dev/null +++ b/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/dmnet_r50-d8.py b/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..d22ba52640bebd805b3b8d07025e276dfb023759 --- /dev/null +++ b/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/dnl_r50-d8.py b/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..edb4c174c51e34c103737ba39bfc48bf831e561d --- /dev/null +++ b/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/emanet_r50-d8.py b/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..26adcd430926de0862204a71d345f2543167f27b --- /dev/null +++ b/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/encnet_r50-d8.py b/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..be777123a886503172a95fe0719e956a147bbd68 --- /dev/null +++ b/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fast_scnn.py b/configs/_base_/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..32fdeb659355a5ce5ef2cc7c2f30742703811cdf --- /dev/null +++ b/configs/_base_/models/fast_scnn.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_hr18.py b/configs/_base_/models/fcn_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..c3e299bc89ada56ca14bbffcbdb08a586b8ed9e9 --- /dev/null +++ b/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_r50-d8.py b/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..5e98f6cc918b6146fc6d613c6918e825ef1355c3 --- /dev/null +++ b/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_unet_s5-d16.py b/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..a33e7972877f902d0e7d18401ca675e3e4e60a18 --- /dev/null +++ b/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/configs/_base_/models/fpn_r50.py b/configs/_base_/models/fpn_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..86ab327db92e44c14822d65f1c9277cb007f17c1 --- /dev/null +++ b/configs/_base_/models/fpn_r50.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fpn_uniformer.py b/configs/_base_/models/fpn_uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..8aae98c5991055bfcc08e82ccdc09f8b1d9f8a8d --- /dev/null +++ b/configs/_base_/models/fpn_uniformer.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + neck=dict( + type='FPN', + in_channels=[64, 128, 320, 512], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole') +) diff --git a/configs/_base_/models/gcnet_r50-d8.py b/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..3d2ad69f5c22adfe79d5fdabf920217628987166 --- /dev/null +++ b/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/lraspp_m-v3-d8.py b/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..93258242a90695cc94a7c6bd41562d6a75988771 --- /dev/null +++ b/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,25 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/nonlocal_r50-d8.py b/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..5674a39854cafd1f2e363bac99c58ccae62f24da --- /dev/null +++ b/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/ocrnet_hr18.py b/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..c60f62a7cdf3f5c5096a7a7e725e8268fddcb057 --- /dev/null +++ b/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,68 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/ocrnet_r50-d8.py b/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..615aa3ff703942b6c22b2d6e9642504dd3e41ebd --- /dev/null +++ b/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pointrend_r50.py b/configs/_base_/models/pointrend_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..9d323dbf9466d41e0800aa57ef84045f3d874bdf --- /dev/null +++ b/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2)) diff --git a/configs/_base_/models/psanet_r50-d8.py b/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..689513fa9d2a40f14bf0ae4ae61f38f0dcc1b3da --- /dev/null +++ b/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pspnet_r50-d8.py b/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..f451e08ad2eb0732dcb806b1851eb978d4acf136 --- /dev/null +++ b/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pspnet_unet_s5-d16.py b/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..fcff9ec4f41fad158344ecd77313dc14564f3682 --- /dev/null +++ b/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/configs/_base_/models/upernet_r50.py b/configs/_base_/models/upernet_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..10974962fdd7136031fd06de1700f497d355ceaa --- /dev/null +++ b/configs/_base_/models/upernet_r50.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_uniformer.py b/configs/_base_/models/upernet_uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..6a0dad04b62c8f799fa4a34fde129303099df40c --- /dev/null +++ b/configs/_base_/models/upernet_uniformer.py @@ -0,0 +1,43 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000000000000000000000000000000000000..52603890b10f25faf8eec9f9e5a4468fae09b811 --- /dev/null +++ b/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric='mIoU') diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000000000000000000000000000000000000..bf780a1b6f6521833c6a5859675147824efa599d --- /dev/null +++ b/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric='mIoU') diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000000000000000000000000000000000000..cdbf841abcb26eed87bf76ab816aff4bae0630ee --- /dev/null +++ b/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=40000) +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000000000000000000000000000000000000..c190cee6bdc7922b688ea75dc8f152fa15c24617 --- /dev/null +++ b/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric='mIoU') diff --git a/configs/ann/README.md b/configs/ann/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7b166152fdfc5464fb7dd5e39c678cd735294b27 --- /dev/null +++ b/configs/ann/README.md @@ -0,0 +1,52 @@ +# Asymmetric Non-local Neural Networks for Semantic Segmentation + +## Introduction + + + +```latex +@inproceedings{annn, + author = {Zhen Zhu and + Mengde Xu and + Song Bai and + Tengteng Huang and + Xiang Bai}, + title = {Asymmetric Non-local Neural Networks for Semantic Segmentation}, + booktitle={International Conference on Computer Vision}, + year = {2019}, + url = {http://arxiv.org/abs/1908.07678}, +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ANN | R-50-D8 | 512x1024 | 40000 | 6 | 3.71 | 77.40 | 78.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json) | +| ANN | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.55 | 76.55 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json) | +| ANN | R-50-D8 | 769x769 | 40000 | 6.8 | 1.70 | 78.89 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json) | +| ANN | R-101-D8 | 769x769 | 40000 | 10.7 | 1.15 | 79.32 | 80.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json) | +| ANN | R-50-D8 | 512x1024 | 80000 | - | - | 77.34 | 78.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json) | +| ANN | R-101-D8 | 512x1024 | 80000 | - | - | 77.14 | 78.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json) | +| ANN | R-50-D8 | 769x769 | 80000 | - | - | 78.88 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json) | +| ANN | R-101-D8 | 769x769 | 80000 | - | - | 78.80 | 80.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x512 | 80000 | 9.1 | 21.01 | 41.01 | 42.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json) | +| ANN | R-101-D8 | 512x512 | 80000 | 12.5 | 14.12 | 42.94 | 44.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json) | +| ANN | R-50-D8 | 512x512 | 160000 | - | - | 41.74 | 42.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json) | +| ANN | R-101-D8 | 512x512 | 160000 | - | - | 42.94 | 44.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ANN | R-50-D8 | 512x512 | 20000 | 6 | 20.92 | 74.86 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json) | +| ANN | R-101-D8 | 512x512 | 20000 | 9.5 | 13.94 | 77.47 | 78.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json) | +| ANN | R-50-D8 | 512x512 | 40000 | - | - | 76.56 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json) | +| ANN | R-101-D8 | 512x512 | 40000 | - | - | 76.70 | 78.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json) | diff --git a/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py b/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d494e07333217e0c6830d36d1bb58fa78b03cfb0 --- /dev/null +++ b/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py b/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1eeff0b030cf1db8c6ec9740fa65db44b2026d58 --- /dev/null +++ b/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_512x512_160k_ade20k.py b/configs/ann/ann_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..9e43af541f6e3df3f36479e736bb0c03fc916970 --- /dev/null +++ b/configs/ann/ann_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py b/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..d854f2e4223731f443369febc500dbccdc524d9d --- /dev/null +++ b/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py b/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..893c53b1ca4bf9788e4d94f0f53cfe92a93f48ce --- /dev/null +++ b/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_512x512_80k_ade20k.py b/configs/ann/ann_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..a64dac670ed4d4632e7b9791ec5f8a334dcea78e --- /dev/null +++ b/configs/ann/ann_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py b/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..59508248490b3edbac1c46b4fcc7891f99655b9b --- /dev/null +++ b/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py b/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a9c712d1ccfd62ddf6f12ff01ea347ca1995013b --- /dev/null +++ b/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py b/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..00b2594ba8a1c9edc90cca7a6d7c3334fa209edc --- /dev/null +++ b/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py b/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..ef7b369dd9e12b2282a30da14f99dd4547c53a7b --- /dev/null +++ b/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/ann/ann_r50-d8_512x512_160k_ade20k.py b/configs/ann/ann_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..ca6bb248ac867d463c274f975c884aa80a57730f --- /dev/null +++ b/configs/ann/ann_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py b/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..071f190261c4e8f4a80a5da12a88e0cfcdfef0d8 --- /dev/null +++ b/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py b/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..82a1c9386c51fb0ada436e51702beb961a534b26 --- /dev/null +++ b/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/ann/ann_r50-d8_512x512_80k_ade20k.py b/configs/ann/ann_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..5e04aa7c6ac050d119e07b715e2082f692e1a1de --- /dev/null +++ b/configs/ann/ann_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py b/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..4912bdb9fb298518ae084eb7df0ad22d3e4ff84f --- /dev/null +++ b/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py b/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d1cc072b152986102286f503e3d7b92999bf414c --- /dev/null +++ b/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/apcnet/README.md b/configs/apcnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b89ac6d7b2ed2da1788b7400a121f6509774baf8 --- /dev/null +++ b/configs/apcnet/README.md @@ -0,0 +1,39 @@ +# Adaptive Pyramid Context Network for Semantic Segmentation + +## Introduction + + + +```latex +@InProceedings{He_2019_CVPR, +author = {He, Junjun and Deng, Zhongying and Zhou, Lei and Wang, Yali and Qiao, Yu}, +title = {Adaptive Pyramid Context Network for Semantic Segmentation}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| APCNet | R-50-D8 | 512x1024 | 40000 | 7.7 | 3.57 | 78.02 | 79.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json) | +| APCNet | R-101-D8 | 512x1024 | 40000 | 11.2 | 2.15 | 79.08 | 80.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) | +| APCNet | R-50-D8 | 769x769 | 40000 | 8.7 | 1.52 | 77.89 | 79.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json) | +| APCNet | R-101-D8 | 769x769 | 40000 | 12.7 | 1.03 | 77.96 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json) | +| APCNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.96 | 79.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json) | +| APCNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.64 | 80.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) | +| APCNet | R-50-D8 | 769x769 | 80000 | - | - | 78.79 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json) | +| APCNet | R-101-D8 | 769x769 | 80000 | - | - | 78.45 | 79.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| APCNet | R-50-D8 | 512x512 | 80000 | 10.1 | 19.61 | 42.20 | 43.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json) | +| APCNet | R-101-D8 | 512x512 | 80000 | 13.6 | 13.10 | 45.54 | 46.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json) | +| APCNet | R-50-D8 | 512x512 | 160000 | - | - | 43.40 | 43.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json) | +| APCNet | R-101-D8 | 512x512 | 160000 | - | - | 45.41 | 46.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) | diff --git a/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py b/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1e1cec67355abae33d518417eb96eae111f16d2b --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py b/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..04cb006ba146268e1d3278151bc6ea00a4fb1bfe --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py b/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..1ce2279a0fbfd6fcc7cd20e3f552b1a39f47d943 --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py b/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..8f10b98406c88256c66d3bbe241c149791d68feb --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py b/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5c44ebcaf36075e67208c5f033d1e5f9a78dda4e --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py b/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..616984575dda73a13fc5870f60ae6ffa30d6b01b --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..99c61a942e4868315ce4a9404d113f73fed4a4ea --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..62a0627ae2e9bb17974068e56ee660093e944e0d --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py b/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..f7821c559d2f92d23b28e07e040a54cfc425eefc --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py b/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..daafa5fbc12c3ed6c10b5234d520166f774e0f94 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..3db6140cb97da1d202fd464d01f793276effa629 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py b/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..9cac4254f37bc3755bff869a10eb3cb75db4d943 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ccnet/README.md b/configs/ccnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c8ba1cdf74c0207683e41ad905361c671577d6d --- /dev/null +++ b/configs/ccnet/README.md @@ -0,0 +1,47 @@ +# CCNet: Criss-Cross Attention for Semantic Segmentation + +## Introduction + + + +```latex +@article{huang2018ccnet, + title={CCNet: Criss-Cross Attention for Semantic Segmentation}, + author={Huang, Zilong and Wang, Xinggang and Huang, Lichao and Huang, Chang and Wei, Yunchao and Liu, Wenyu}, + booktitle={ICCV}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| CCNet | R-50-D8 | 512x1024 | 40000 | 6 | 3.32 | 77.76 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json) | +| CCNet | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.31 | 76.35 | 78.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json) | +| CCNet | R-50-D8 | 769x769 | 40000 | 6.8 | 1.43 | 78.46 | 79.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json) | +| CCNet | R-101-D8 | 769x769 | 40000 | 10.7 | 1.01 | 76.94 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json) | +| CCNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.03 | 80.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json) | +| CCNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.87 | 79.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json) | +| CCNet | R-50-D8 | 769x769 | 80000 | - | - | 79.29 | 81.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json) | +| CCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.45 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.89 | 41.78 | 42.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json) | +| CCNet | R-101-D8 | 512x512 | 80000 | 12.2 | 14.11 | 43.97 | 45.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json) | +| CCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.08 | 43.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json) | +| CCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.71 | 45.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| CCNet | R-50-D8 | 512x512 | 20000 | 6 | 20.45 | 76.17 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json) | +| CCNet | R-101-D8 | 512x512 | 20000 | 9.5 | 13.64 | 77.27 | 79.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json) | +| CCNet | R-50-D8 | 512x512 | 40000 | - | - | 75.96 | 77.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json) | +| CCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.87 | 78.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json) | diff --git a/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d2bac38ca6760af6441ede5a04409ed495ef87f3 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..989928ab7f98da86e291451040ff85669a9fbddb --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py b/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..c32bf48751f0a18983bff0d99310870b71801663 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py b/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..53eb77c0cd6690668ee7c2a666bd85b9a5f7e73b --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py b/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..d7eb668f39bbd22a1f42628428bc19d1645e9865 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py b/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..029c1d525b809b61dc8e548ebe4fb26e5c68a8be --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..43f05fab05ee4e20c3509a923118fe9818543cbd --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py b/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..654f377b6f6152c9bd98d33824a39a41d7510c3f --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6a4316dde57206fe369e72fa0d32a529fe1a1932 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..16e34356e9f8566ec73e3c25c771e281d3eeb975 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py b/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..1ad94d8988bb822c1571816255464126d9d5b95d --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py b/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..bbcd29ccea8dcf9f67f1cd198dacd5dab380b265 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py b/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..947b8ac8ce1ddf7906ad39788c6992df3b506d29 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py b/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..1a1f49cf6b112afdadf1841571f51b98c010ddf8 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..580d59ca6995ea95a9345ef3ea574ea5b57e9cfb --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py b/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c6dac64377bb3f73fdf5c836fa9c38757f75ff76 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/cgnet/README.md b/configs/cgnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f1cad2051030184fe3016b2a605bc598a9ae7cec --- /dev/null +++ b/configs/cgnet/README.md @@ -0,0 +1,26 @@ +# CGNet: A Light-weight Context Guided Network for Semantic Segmentation + +## Introduction + + + +```latext +@article{wu2020cgnet, + title={Cgnet: A light-weight context guided network for semantic segmentation}, + author={Wu, Tianyi and Tang, Sheng and Zhang, Rui and Cao, Juan and Zhang, Yongdong}, + journal={IEEE Transactions on Image Processing}, + volume={30}, + pages={1169--1179}, + year={2020}, + publisher={IEEE} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CGNet | M3N21 | 680x680 | 60000 | 7.5 | 30.51 | 65.63 | 68.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet/cgnet_680x680_60k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes_20201101_110253-4c0b2f2d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes-20201101_110253.log.json) | +| CGNet | M3N21 | 512x1024 | 60000 | 8.3 | 31.14 | 68.27 | 70.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet/cgnet_512x1024_60k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes_20201101_110254-124ea03b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes-20201101_110254.log.json) | diff --git a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..11421ef9d375d01b01c333c3705d6eb6e3348ee8 --- /dev/null +++ b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py @@ -0,0 +1,66 @@ +_base_ = ['../_base_/models/cgnet.py', '../_base_/default_runtime.py'] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +total_iters = 60000 +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') + +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=8, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/configs/cgnet/cgnet_680x680_60k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..2b2f8eefb7dbecf81fcd2db54644493480825246 --- /dev/null +++ b/configs/cgnet/cgnet_680x680_60k_cityscapes.py @@ -0,0 +1,50 @@ +_base_ = [ + '../_base_/models/cgnet.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py' +] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +total_iters = 60000 +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') + +img_norm_cfg = dict( + mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) +crop_size = (680, 680) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=8, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/danet/README.md b/configs/danet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..655a845c6ae177c5e18445754f2b4daf823c5c4b --- /dev/null +++ b/configs/danet/README.md @@ -0,0 +1,47 @@ +# Dual Attention Network for Scene Segmentation + +## Introduction + + + +```latex +@article{fu2018dual, + title={Dual Attention Network for Scene Segmentation}, + author={Jun Fu, Jing Liu, Haijie Tian, Yong Li, Yongjun Bao, Zhiwei Fang,and Hanqing Lu}, + booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DANet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.66 | 78.74 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json) | +| DANet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.99 | 80.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json) | +| DANet | R-50-D8 | 769x769 | 40000 | 8.8 | 1.56 | 78.88 | 80.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json) | +| DANet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.07 | 79.88 | 81.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json) | +| DANet | R-50-D8 | 512x1024 | 80000 | - | - | 79.34 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json) | +| DANet | R-101-D8 | 512x1024 | 80000 | - | - | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json) | +| DANet | R-50-D8 | 769x769 | 80000 | - | - | 79.27 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json) | +| DANet | R-101-D8 | 769x769 | 80000 | - | - | 80.47 | 82.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x512 | 80000 | 11.5 | 21.20 | 41.66 | 42.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json) | +| DANet | R-101-D8 | 512x512 | 80000 | 15 | 14.18 | 43.64 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json) | +| DANet | R-50-D8 | 512x512 | 160000 | - | - | 42.45 | 43.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json) | +| DANet | R-101-D8 | 512x512 | 160000 | - | - | 44.17 | 45.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DANet | R-50-D8 | 512x512 | 20000 | 6.5 | 20.94 | 74.45 | 75.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json) | +| DANet | R-101-D8 | 512x512 | 20000 | 9.9 | 13.76 | 76.02 | 77.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json) | +| DANet | R-50-D8 | 512x512 | 40000 | - | - | 76.37 | 77.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json) | +| DANet | R-101-D8 | 512x512 | 40000 | - | - | 76.51 | 77.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json) | diff --git a/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py b/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfb9bdb3064275c2ac3bf2a057ef8eb79c308df --- /dev/null +++ b/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py b/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d80b2ec160ae1c41499d45242713a99122d8adf8 --- /dev/null +++ b/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_512x512_160k_ade20k.py b/configs/danet/danet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..0f22d0fb6362252ac02f3f152a42997c68b90343 --- /dev/null +++ b/configs/danet/danet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py b/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..709f93cba3e3bca6ce0635457ab1823b04123bf8 --- /dev/null +++ b/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py b/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..5c623eb56836760694b50f3e4e66aa0f1fc069df --- /dev/null +++ b/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_512x512_80k_ade20k.py b/configs/danet/danet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..bd31bc8f283fe8c322ee4876deadb89569dc1743 --- /dev/null +++ b/configs/danet/danet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py b/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..597d76de79610780b03cd91dba5f3a4f10147bcd --- /dev/null +++ b/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py b/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..70f9b31966128e8d9ec37859f57a7edfd8e6d1b2 --- /dev/null +++ b/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py b/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1b70c5b8d49f04661e23604ca4da56a82b1b99c9 --- /dev/null +++ b/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py b/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..03734310d7338c75d48c914cb325500961c04a79 --- /dev/null +++ b/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/danet/danet_r50-d8_512x512_160k_ade20k.py b/configs/danet/danet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..22aaf857c3212d0b36b0b04e7990616025a3ef9b --- /dev/null +++ b/configs/danet/danet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py b/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..010f86f1aac1b5c827dec29f692d137dc1c399bf --- /dev/null +++ b/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py b/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..0cef0f09bfa2290d14fc3a783ea500d6c3da2931 --- /dev/null +++ b/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/danet/danet_r50-d8_512x512_80k_ade20k.py b/configs/danet/danet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..154e84890ed73fe4813dddc8c321de6cd2854fc1 --- /dev/null +++ b/configs/danet/danet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py b/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5c5b94e5a27d7f902d4bdea7ef6c4ef0b816bb99 --- /dev/null +++ b/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py b/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c7237ae03c601204dc7c03018ca17ed363090569 --- /dev/null +++ b/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/README.md b/configs/deeplabv3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..02c27753ab536ec7237897a826780aac88d04b0a --- /dev/null +++ b/configs/deeplabv3/README.md @@ -0,0 +1,73 @@ +# Rethinking atrous convolution for semantic image segmentation + +## Introduction + + + +```latext +@article{chen2017rethinking, + title={Rethinking atrous convolution for semantic image segmentation}, + author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, + journal={arXiv preprint arXiv:1706.05587}, + year={2017} +} +``` + +## Results and models + +Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series. + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | --------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x1024 | 40000 | 6.1 | 2.57 | 79.09 | 80.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 40000 | 9.6 | 1.92 | 77.12 | 79.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 40000 | 6.9 | 1.11 | 78.58 | 79.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 40000 | 10.9 | 0.83 | 79.27 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json) | +| DeepLabV3 | R-18-D8 | 512x1024 | 80000 | 1.7 | 13.78 | 76.70 | 78.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 512x1024 | 80000 | - | - | 79.32 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | - | - | 80.20 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) | +| DeepLabV3 | R-18-D8 | 769x769 | 80000 | 1.9 | 5.55 | 76.60 | 78.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 80000 | - | - | 79.89 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 80000 | - | - | 79.67 | 80.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 40000 | 4.7 | - 6.96 | 76.71 | 78.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 80000 | - | - | 78.36 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) | +| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) | +| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 76.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) | +| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 8.9 | 14.76 | 42.42 | 43.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 12.4 | 10.14 | 44.08 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | 42.66 | 44.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | 45.00 | 46.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 6.1 | 13.88 | 76.17 | 77.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | 46.55 | 47.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 46.42 | 47.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | - | - | 52.61 | 54.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 52.46 | 54.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json) | diff --git a/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f20f260e23a95dfee9dfdceef9badab992246f53 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..de4a8a5e9f030f1e8a8802596885186163f23eed --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..0b5256f7b7b053cbe8d9e4ca2ec6139bb02387f6 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..4874121fd01e4024bfde445f451b7368c6834511 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..001b7a69c15299fc1fe5b269a5accf92c5ece032 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..032dc8b6219421698c3a1bfb4bca5addfeea1ab3 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..8c707c79d659bc544d242352bcb29686eb40b004 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6804a5781369d1031f179d421a3b5a160fd575d3 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..df6f36ef7c3b71ba7979aa7a1b226b3e3ebd9bb4 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..40f5f62373e59d1c6c01ca3f57777698461127c9 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..fb2be22f8bc2e10cdfba4f58b2ad1ced913b4ea4 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py b/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..796ba3fb142394c4d93a29ba57548dca59d8d02b --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e6d58a67b3b4dddf3da42efca30fa599e623f183 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..13094a98ee9be3cf8c88370e1e111cb4dde03ec4 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5186bf614bc9ebffe47323ea61afbc9604be265b --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d185db95adc61734f11f0dcd7b6c45aa652680b0 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e084e95c70b0b7b0c9dcc3388d6b7d3d51d54b6d --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a990c076536ad9455a9203f5b6a60157f2f2f99f --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b25e725ed98324e6ea648567740dc67e0413b4f9 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fd920f0ca7c690d3d1c44f5f7be1cbea18fa14d4 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..9d493ef527bb161be98d0e4ea433104b3bb9ff48 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..038993c6a434d843ddcd1f754bec191ae9da983e --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..71a0fda48aa2538e4d913e73e94a71564377ea50 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..bcdc0b459d23e4392e66c5ea615c6c3ad3147ace --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..8e7420d24a20b662286266cac58cab4721dc8df3 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..132787db98d3fc9df5ed62e31738c82da8c279bf --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..b4a9d4e1b9123b3c965cd430237ce9fcc7018a11 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..f62da1a8090da389a77d77a9887926af2a7ded49 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..492bd3dfdce331070cb9645dbe55142e9b662da1 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py b/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..78f4d0d9de3d6b8dd2b097531317956d8e3b19f1 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e35d1988f0bb7ad47a73ef1a64b73d9b40e0ba40 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..dd7c16580d0620bc854f2c6eb7c881bdcd23020a --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e742d9a5ec2b6addf829cb802de27ea1afd53301 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..332d9cfb79fb698c7867f0f80053c1fd29bf2c1d --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3plus/README.md b/configs/deeplabv3plus/README.md new file mode 100644 index 0000000000000000000000000000000000000000..be46e329b6b602f2f6fe77eb1af161b072c92534 --- /dev/null +++ b/configs/deeplabv3plus/README.md @@ -0,0 +1,75 @@ +# Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + +## Introduction + + + +```latex +@inproceedings{deeplabv3plus2018, + title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, + author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, + booktitle={ECCV}, + year={2018} +} +``` + +## Results and models + +Note: +`D-8`/`D-16` here corresponding to the output stride 8/16 setting for DeepLab series. +`MG-124` stands for multi-grid dilation in the last stage of ResNet. + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | --------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x1024 | 40000 | 7.5 | 3.94 | 79.61 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 40000 | 11 | 2.60 | 80.21 | 81.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 40000 | 8.5 | 1.72 | 78.97 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 40000 | 12.5 | 1.15 | 79.46 | 80.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json) | +| DeepLabV3+ | R-18-D8 | 512x1024 | 80000 | 2.2 | 14.27 | 76.89 | 78.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json) | +| DeepLabV3+ | R-50-D8 | 512x1024 | 80000 | - | - | 80.09 | 81.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | - | - | 80.97 | 82.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ | R-18-D8 | 769x769 | 80000 | 2.5 | 5.74 | 76.26 | 77.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 80000 | - | - | 79.83 | 81.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 80000 | - | - | 80.98 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 40000 | 5.8 | 7.48 | 79.09 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 80000 | 9.9 | - | 79.90 | 81.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-18b-D8 | 512x1024 | 80000 | 2.1 | 14.95 | 75.87 | 77.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json) | +| DeepLabV3+ | R-50b-D8 | 512x1024 | 80000 | 7.4 | 3.94 | 80.28 | 81.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json) | +| DeepLabV3+ | R-101b-D8 | 512x1024 | 80000 | 10.9 | 2.60 | 80.16 | 81.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json) | +| DeepLabV3+ | R-18b-D8 | 769x769 | 80000 | 2.4 | 5.96 | 76.36 | 78.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json) | +| DeepLabV3+ | R-50b-D8 | 769x769 | 80000 | 8.4 | 1.72 | 79.41 | 80.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json) | +| DeepLabV3+ | R-101b-D8 | 769x769 | 80000 | 12.3 | 1.10 | 79.88 | 81.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 10.6 | 21.01 | 42.72 | 43.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 14.1 | 14.16 | 44.60 | 46.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 160000 | - | - | 43.95 | 44.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 160000 | - | - | 45.47 | 46.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) | + +#### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-50-D8 | 512x512 | 20000 | 7.6 | 21 | 75.93 | 77.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) | + +#### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | 47.30 | 48.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 47.23 | 48.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) | + +#### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | - | 52.86 | 54.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 53.2 | 54.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json) | diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..bf39d2f12b719b1c91e38bef71f0f5232543b0dc --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c53ec41baf9043029549b4893b2380372ea5ecd9 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..68e2b072e4b8d076e8c3e929dfdc73bcd24ce859 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..36a510ff41788a5861b5a9504d8e3d08502072e4 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..3a46c28608add5325ec1decf33624c3c00bff1d7 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a7688c7a5f6ff1209eb7c44abdd105e91a2b76 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d6ce85aea5a960e76f8154a5319c7c52e98c4c45 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0ebbd3c70ee5e33c6ef4ae76b6c6a6ce828d07b4 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..a75c9d3019b13d01c0dd13dae53bce3d15791d52 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..ebb1a8eaee16de7443ab3e79e02a37340de511d7 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..3caa6cf8ae61d467628378d99a919c9db1253b91 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..53fd3a909585367ca59eb827c2fbbab4cdf234ea --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c3c92eb26f8fead94f5ad7ac7d7fb60d92c57114 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5ea9cdb5b639e5284cd46e02ce1b67b4729950f7 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..398d9759cafc1d01e78c138abd249808531a97b9 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..136449083f7a9efbad6df94f1acd04170147aaba --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..aff70c93e6142ddda3a874d9dfd57ec6c4cd89b3 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0172d9a87d6dc1c75bf75a9c48363eb985d389a8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b90b292b03a80aa37b8ca236746cf7cddc4ac27e --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b49da3581d9697e726e114b1564fc58a55ef1099 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..318845de1e2124a4dff3348749ec5a13d78d686f --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..f9e831bcd1043ed9feba88bc28ab69d87287ca98 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..1736c2397a9b2a4b4fb12eee8175e5ee98eaf805 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..d2af575df7719ebbca7553647de8c531d1a10fee --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7243d0390f6394fdd528c881bb128b2c13d08037 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..3304d3677f5357f1a3e343b39fcd97b238abdb5e --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..1491e3b8247c9d163d6016caf2fcd8043a053b7e --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..1056ad4d1e2a4f956d12f6daf506620fab27dd17 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..e36c83ba601884b81c06ee69445a94e76224c828 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..352d870bc8eab11974640c4b2d9c80dc6fbbaaf2 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e4bda3eded693bfd44a8c86ced7ae6ee9963c583 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1420b97a4bd0dc0f5451623697666012a2de635c --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..dd8e1da9c7b1d86bc8a0c834bbede9d0fd40acf5 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c0ba019136c2e4f33b015be3d82505bee2066655 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/dmnet/README.md b/configs/dmnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..190373e87922db8f26789fd2b29a9ca953bb0d4f --- /dev/null +++ b/configs/dmnet/README.md @@ -0,0 +1,39 @@ +# Dynamic Multi-scale Filters for Semantic Segmentation + +## Introduction + + + +```latex +@InProceedings{He_2019_ICCV, +author = {He, Junjun and Deng, Zhongying and Qiao, Yu}, +title = {Dynamic Multi-Scale Filters for Semantic Segmentation}, +booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, +month = {October}, +year = {2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DMNet | R-50-D8 | 512x1024 | 40000 | 7.0 | 3.66 | 77.78 | 79.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json) | +| DMNet | R-101-D8 | 512x1024 | 40000 | 10.6 | 2.54 | 78.37 | 79.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) | +| DMNet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.57 | 78.49 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json) | +| DMNet | R-101-D8 | 769x769 | 40000 | 12.0 | 1.01 | 77.62 | 78.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json) | +| DMNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.07 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json) | +| DMNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.64 | 80.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) | +| DMNet | R-50-D8 | 769x769 | 80000 | - | - | 79.22 | 80.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json) | +| DMNet | R-101-D8 | 769x769 | 80000 | - | - | 79.19 | 80.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x512 | 80000 | 9.4 | 20.95 | 42.37 | 43.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json) | +| DMNet | R-101-D8 | 512x512 | 80000 | 13.0 | 13.88 | 45.34 | 46.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json) | +| DMNet | R-50-D8 | 512x512 | 160000 | - | - | 43.15 | 44.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json) | +| DMNet | R-101-D8 | 512x512 | 160000 | - | - | 45.42 | 46.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) | diff --git a/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py b/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fd6897691d3f8f200783fae7bfe231735f25a11b --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py b/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..116cbdcede32bf24ad95f04291e98754011172c9 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py b/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..d78d46c040f75d16225307d4b4151b87e6e3db29 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py b/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..9713b731a47df9c5e23d26a08ad17d03a0d5e9fe --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py b/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6b222e730073dd42df618db5660ee9d4117f3956 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py b/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f36d490e9c9b31de7eedf735d2712e55f35db998 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1f9a917fa4223bd2428f2b2d10eac446f7ecc71a --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1b38f90dc4318f23d32971e7afbf90a327768f2d --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py b/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..a8fbd9beb11f3d1308ce2cd12da2a177c2d39478 --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py b/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..74f6d6a85a06e96580a3c8d5843f660c85bca5ad --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..19841547a42315164de547a4121cfd64739cf24b --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py b/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..31d95f96eb10025c2ad054cde4c81f47db21f0f2 --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dnlnet/README.md b/configs/dnlnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..73714122b9a89bdef5bf95e62dc804b81d6e1c10 --- /dev/null +++ b/configs/dnlnet/README.md @@ -0,0 +1,42 @@ +# Disentangled Non-Local Neural Networks + +## Introduction + + + +This example is to reproduce ["Disentangled Non-Local Neural Networks"](https://arxiv.org/abs/2006.06668) for semantic segmentation. It is still in progress. + +## Citation + +```latex +@misc{yin2020disentangled, + title={Disentangled Non-Local Neural Networks}, + author={Minghao Yin and Zhuliang Yao and Yue Cao and Xiu Li and Zheng Zhang and Stephen Lin and Han Hu}, + year={2020}, + booktitle={ECCV} +} +``` + +## Results and models (in progress) + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| dnl | R-50-D8 | 512x1024 | 40000 | 7.3 | 2.56 | 78.61 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| dnl | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.96 | 78.31 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| dnl | R-50-D8 | 769x769 | 40000 | 9.2 | 1.50 | 78.44 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json) | +| dnl | R-101-D8 | 769x769 | 40000 | 12.6 | 1.02 | 76.39 | 77.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json) | +| dnl | R-50-D8 | 512x1024 | 80000 | - | - | 79.33 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| dnl | R-101-D8 | 512x1024 | 80000 | - | - | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| dnl | R-50-D8 | 769x769 | 80000 | - | - | 79.36 | 80.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json) | +| dnl | R-101-D8 | 769x769 | 80000 | - | - | 79.41 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNL | R-50-D8 | 512x512 | 80000 | 8.8 | 20.66 | 41.76 | 42.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNL | R-101-D8 | 512x512 | 80000 | 12.8 | 12.54 | 43.76 | 44.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNL | R-50-D8 | 512x512 | 160000 | - | - | 41.87 | 43.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json) | +| DNL | R-101-D8 | 512x512 | 160000 | - | - | 44.25 | 45.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json) | diff --git a/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py b/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1a36e3c80a13f91e37e4d90b7ae47c7e0d204144 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py b/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0f2e1b6da7e63841f4429b1caed5fbe9d537c4f8 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py b/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..aca44e478b67d5a226681c099e64fe67d93cf39b --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py b/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd27a1d1c6bf0e983fafed2e5659701dadb8f24 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py b/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..575e9d01343a4563e0d3ba89b361ea8e358d2dee --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py b/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..4f1b9e19411eb963d16fd2a8174529e69ecd5a1a --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f7aa7444d4c8022563db642478beec4dc5ab0dab --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fdff93f543af6bac93949e68532daea45e437167 --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py b/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..5305689d09b944f6e37aa85567ce3f29fc6974a7 --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py b/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..09604c39729abfc9015eb971069b987c8d8a82cb --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0666199b63e604b09fe8187c378589c25d0d311b --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py b/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b07c4f47629c07faa013b9d1eae3462d898c6f --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) +optimizer = dict( + paramwise_cfg=dict( + custom_keys=dict(theta=dict(wd_mult=0.), phi=dict(wd_mult=0.)))) diff --git a/configs/emanet/README.md b/configs/emanet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec2d726bc351ca3e5c6ec56b9a4572824f232df6 --- /dev/null +++ b/configs/emanet/README.md @@ -0,0 +1,26 @@ +# Expectation-Maximization Attention Networks for Semantic Segmentation + +## Introduction + + + +```latex +@inproceedings{li2019expectation, + title={Expectation-maximization attention networks for semantic segmentation}, + author={Li, Xia and Zhong, Zhisheng and Wu, Jianlong and Yang, Yibo and Lin, Zhouchen and Liu, Hong}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={9167--9176}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| EMANet | R-50-D8 | 512x1024 | 80000 | 5.4 | 4.58 | 77.59 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 512x1024 | 80000 | 6.2 | 2.87 | 79.10 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-50-D8 | 769x769 | 80000 | 8.9 | 1.97 | 79.33 | 80.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 769x769 | 80000 | 10.1 | 1.22 | 79.62 | 81.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json) | diff --git a/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py b/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..58f28b43f55f54c7a604960735963e6b7c13b6f1 --- /dev/null +++ b/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './emanet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py b/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c5dbf20b0fcc7bc1dd077bd8b7077772251d4c1a --- /dev/null +++ b/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './emanet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py b/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..73b7788bf924be2e1588596a88f0155ddc37358e --- /dev/null +++ b/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/emanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py b/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..699aa212c3518901b2f84db3f062c16b023c7538 --- /dev/null +++ b/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/emanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/README.md b/configs/encnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4246caa0deeb20ac51bd7fd79bc47c2bcd42b4a1 --- /dev/null +++ b/configs/encnet/README.md @@ -0,0 +1,39 @@ +# Context Encoding for Semantic Segmentation + +## Introduction + + + +```latex +@InProceedings{Zhang_2018_CVPR, +author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, +title = {Context Encoding for Semantic Segmentation}, +booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| encnet | R-50-D8 | 512x1024 | 40000 | 8.6 | 4.58 | 75.67 | 77.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json) | +| encnet | R-101-D8 | 512x1024 | 40000 | 12.1 | 2.66 | 75.81 | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) | +| encnet | R-50-D8 | 769x769 | 40000 | 9.8 | 1.82 | 76.24 | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json) | +| encnet | R-101-D8 | 769x769 | 40000 | 13.7 | 1.26 | 74.25 | 76.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json) | +| encnet | R-50-D8 | 512x1024 | 80000 | - | - | 77.94 | 79.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json) | +| encnet | R-101-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) | +| encnet | R-50-D8 | 769x769 | 80000 | - | - | 77.44 | 78.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json) | +| encnet | R-101-D8 | 769x769 | 80000 | - | - | 76.10 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| encnet | R-50-D8 | 512x512 | 80000 | 10.1 | 22.81 | 39.53 | 41.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json) | +| encnet | R-101-D8 | 512x512 | 80000 | 13.6 | 14.87 | 42.11 | 43.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json) | +| encnet | R-50-D8 | 512x512 | 160000 | - | - | 40.10 | 41.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json) | +| encnet | R-101-D8 | 512x512 | 160000 | - | - | 42.61 | 44.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) | diff --git a/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py b/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f34373d9ebab5ef6f4c01e3eab8a97c288495be0 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py b/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0207b3144460d25229e3ac4c4d0d9fc1d34292 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py b/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..8fec6ba255f33d48a66a831de4571346a7a2bd2e --- /dev/null +++ b/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py b/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..c264af998b5ef6a9e521db204205fb998cce68a9 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py b/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..8a6968ea583758191fa8e94497c7186e653c7afb --- /dev/null +++ b/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..94151004ea88394373cf8f135b065d5056b11179 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py b/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d6ade67b76ce04e1ede3ff99aab4863705cff446 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py b/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..55648c08b2c4eb78d7d5ae65482e5e5b291c058a --- /dev/null +++ b/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py b/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..4ea6ed0e84f3aa7d2c7acd8dd5c459a8cd3ce45c --- /dev/null +++ b/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py b/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d2feeef7e982550481365f8187cb1a50f0fafcc9 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py b/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..2a5dc203cc793860aae7743d16c4fb9a564ad1d8 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py b/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..9cb7952cede58165d2ed0f35d2208ad1ffb65232 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py b/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..81f3cbfbf516e833821c49deecd8f167170021f0 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..835375cb0447378fc76431158eb0b8fc011d36bc --- /dev/null +++ b/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py b/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d311e33f56ba431a882b0e7079001b0e9932a011 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py b/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7b535f3c80818ce6b692b66f18ceee8e7b181fdc --- /dev/null +++ b/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py b/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..600b701a7194ead496cc924bee897b6096e1c7ca --- /dev/null +++ b/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + backbone=dict(stem_channels=128), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fastscnn/README.md b/configs/fastscnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9cea8d0fd0dd76f5322c3d53263d3d5faef539fa --- /dev/null +++ b/configs/fastscnn/README.md @@ -0,0 +1,22 @@ +# Fast-SCNN for Semantic Segmentation + +## Introduction + + + +```latex +@article{poudel2019fast, + title={Fast-scnn: Fast semantic segmentation network}, + author={Poudel, Rudra PK and Liwicki, Stephan and Cipolla, Roberto}, + journal={arXiv preprint arXiv:1902.04502}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | --------- | --------- | ------: | -------- | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Fast-SCNN | Fast-SCNN | 512x1024 | 80000 | 8.4 | 63.61 | 69.06 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fast_scnn.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-f5096c79.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-20200807_165744.log.json) | diff --git a/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py b/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..3d9c9999370c8b1c28af3063a3aded0d88c91caf --- /dev/null +++ b/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +# Re-config the data sampler. +data = dict(samples_per_gpu=2, workers_per_gpu=4) + +# Re-config the optimizer. +optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5) diff --git a/configs/fcn/README.md b/configs/fcn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..270781b48b99fe13628f3b3f81fb7457a9052bc9 --- /dev/null +++ b/configs/fcn/README.md @@ -0,0 +1,85 @@ +# Fully Convolutional Networks for Semantic Segmentation + +## Introduction + + + +```latex +@article{shelhamer2017fully, + title={Fully convolutional networks for semantic segmentation}, + author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={39}, + number={4}, + pages={640--651}, + year={2017}, + publisher={IEEE Trans Pattern Anal Mach Intell} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x1024 | 40000 | 5.7 | 4.17 | 72.25 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json) | +| FCN | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.66 | 75.45 | 76.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) | +| FCN | R-50-D8 | 769x769 | 40000 | 6.5 | 1.80 | 71.47 | 72.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json) | +| FCN | R-101-D8 | 769x769 | 40000 | 10.4 | 1.19 | 73.93 | 75.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json) | +| FCN | R-18-D8 | 512x1024 | 80000 | 1.7 | 14.65 | 71.11 | 72.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json) | +| FCN | R-50-D8 | 512x1024 | 80000 | - | | 73.61 | 74.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json) | +| FCN | R-101-D8 | 512x1024 | 80000 | - | - | 75.13 | 75.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) | +| FCN | R-18-D8 | 769x769 | 80000 | 1.9 | 6.40 | 70.80 | 73.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json) | +| FCN | R-50-D8 | 769x769 | 80000 | - | - | 72.64 | 73.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json) | +| FCN | R-101-D8 | 769x769 | 80000 | - | - | 75.52 | 76.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json) | +| FCN | R-18b-D8 | 512x1024 | 80000 | 1.6 | 16.74 | 70.24 | 72.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json) | +| FCN | R-50b-D8 | 512x1024 | 80000 | 5.6 | 4.20 | 75.65 | 77.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json) | +| FCN | R-101b-D8 | 512x1024 | 80000 | 9.1 | 2.73 | 77.37 | 78.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json) | +| FCN | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.70 | 69.66 | 72.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json) | +| FCN | R-50b-D8 | 769x769 | 80000 | 6.3 | 1.82 | 73.83 | 76.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json) | +| FCN | R-101b-D8 | 769x769 | 80000 | 10.3 | 1.15 | 77.02 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json) | +| FCN-D6 | R-50-D16 | 512x1024 | 40000 | 3.4 | 10.22 | 77.06 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-98d5d1bc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json) | +| FCN-D6 | R-50-D16 | 512x1024 | 80000 | - | 10.35 | 77.27 | 78.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-98d5d1bc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json) | +| FCN-D6 | R-50-D16 | 769x769 | 40000 | 3.7 | 4.17 | 76.82 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-1aab18ed.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json) | +| FCN-D6 | R-50-D16 | 769x769 | 80000 | - | 4.15 | 77.04 | 78.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-109d88eb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json) | +| FCN-D6 | R-101-D16 | 512x1024 | 40000 | 4.5 | 8.04 | 77.36 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-9cf2b450.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json) | +| FCN-D6 | R-101-D16 | 512x1024 | 80000 | - | 8.26 | 78.46 | 80.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-cb336445.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json) | +| FCN-D6 | R-101-D16 | 769x769 | 40000 | 5.0 | 3.12 | 77.28 | 78.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-60b114e9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json) | +| FCN-D6 | R-101-D16 | 769x769 | 80000 | - | 3.21 | 78.06 | 79.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-e33adc4f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json) | +| FCN-D6 | R-50b-D16 | 512x1024 | 80000 | 3.2 | 10.16 | 76.99 | 79.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-6a0b62e9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json) | +| FCN-D6 | R-50b-D16 | 769x769 | 80000 | 3.6 | 4.17 | 76.86 | 78.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-d665f231.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json) | +| FCN-D6 | R-101b-D16 | 512x1024 | 80000 | 4.3 | 8.46 | 77.72 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-3f2eb5b4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json) | +| FCN-D6 | R-101b-D16 | 769x769 | 80000 | 4.8 | 3.32 | 77.34 | 78.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-c4d8bfbc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 80000 | 8.5 | 23.49 | 35.94 | 37.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json) | +| FCN | R-101-D8 | 512x512 | 80000 | 12 | 14.78 | 39.61 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json) | +| FCN | R-50-D8 | 512x512 | 160000 | - | - | 36.10 | 38.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json) | +| FCN | R-101-D8 | 512x512 | 160000 | - | - | 39.91 | 41.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | R-50-D8 | 512x512 | 20000 | 5.7 | 23.28 | 67.08 | 69.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) | +| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) | +| FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | 44.43 | 45.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757-b5e97937.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | 44.13 | 45.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310-4711813f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | R-101-D8 | 480x480 | 40000 | - | - | 48.42 | 50.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | 49.35 | 51.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json) | diff --git a/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py b/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..aec4254c8f4ae835cdfbe785bb0c375173d1e232 --- /dev/null +++ b/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d0bafc52abdb3d9bda87411e8199e86fc9d5a8b8 --- /dev/null +++ b/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py b/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..29a9f98a93fedbf9644599203b48aa30a7ad8a28 --- /dev/null +++ b/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py b/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1f21c6578bb8f820448f773fb6651b02e64b6123 --- /dev/null +++ b/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..af3f765b76e7269d22c8f362e1d41f03d1efaf93 --- /dev/null +++ b/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_d6_r50b-d16_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py b/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e3d4d884fd0c92b35dd428a55ce22255cecac497 --- /dev/null +++ b/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_d6_r50b-d16_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f30646ede7b036e6c82c335729b19f92293efb35 --- /dev/null +++ b/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(dilation=6), + auxiliary_head=dict(dilation=6)) diff --git a/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e4b623aca9ce1138baa259cbdd02920a47765f8d --- /dev/null +++ b/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(dilation=6), + auxiliary_head=dict(dilation=6)) diff --git a/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..01d8f27c8cc62e681df770e111ff9f866e9d112f --- /dev/null +++ b/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(align_corners=True, dilation=6), + auxiliary_head=dict(align_corners=True, dilation=6), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py b/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c5ef3b880eac7dd089aace8ce2a87e1bd837beed --- /dev/null +++ b/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(align_corners=True, dilation=6), + auxiliary_head=dict(align_corners=True, dilation=6), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0749ff14a3e7d207e82572e0516b2555ccacc7d9 --- /dev/null +++ b/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py b/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fba8948a031d46918e9c28cf94fff8e384228e82 --- /dev/null +++ b/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..f3a15b41054318d508e98685632921f262029de0 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..908f4bff0062e06ce1607c55827aac9fe5b1c354 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..bdccfd99ba0c25646f02850483c2cdf679fdbf3d --- /dev/null +++ b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..09cb612e42056133ff4b061e8cd934f0f650d717 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py b/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7918dd10d05cd98dbc02f02ef1b93e3134f52357 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..528110dc73c15008869a9ad9851ef487f0c952c7 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py b/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf6780f2c821052692ddcb904bd10e6256c1e71 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py b/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..09a5fe5468f0155f8fd0bf2cd1574a33624d8492 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py b/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..eafefaa67565513c277c5eb42e3661a88133cb27 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py b/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..6d0294530f4c817b352cb020d111e3248690ae1f --- /dev/null +++ b/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py b/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6b4cc571294fa45b4442c2bfeb9fda13a14fc5c2 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..3503c76935e294c881130b309999d32f13df8839 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1b9bf60fc13364ca1b7b3842664950f653426e67 --- /dev/null +++ b/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f36eb02e68707d502cbe315ff8f6f25b232dee92 --- /dev/null +++ b/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5a1d29e480cb46a763cb17d2105b3f040153d417 --- /dev/null +++ b/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6644a58dea86fd38e208abbedffe4f836e677078 --- /dev/null +++ b/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..92accfc703fc398d2845d7dc2f1d5336f24738e8 --- /dev/null +++ b/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5dd34dd2134c745275c66adc5488b4b9f68d6809 --- /dev/null +++ b/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..7c57a6f8ff0a7dbb18666c1b9c882da10e586aa3 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..4a8180038be33fba9c3229ee3c017f2f0628544f --- /dev/null +++ b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..df6d25b6a91f84587337e02d831f94182870ae5b --- /dev/null +++ b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..02507ccb7e2f5f25014c451dcf9ba51c3a61dadc --- /dev/null +++ b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py b/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..401c6ea7330d45d8f7604a1da63fc6e15faea424 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..990a085eda2f2dc47f1a1289bfbf2726ad8c9c4f --- /dev/null +++ b/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py b/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..9ca7fd23cedc0567a015bd5f8641a509ead6110a --- /dev/null +++ b/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py b/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..17206a5171dcc357c589a1711afa52d87faeece0 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py b/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..8cec429c3e27ad2543b7e38fa206e6606fda4d5a --- /dev/null +++ b/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py b/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..ef194cb594eb76316324066e23e48184d8cede27 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py b/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fca98c1d9ace73a61ae395914e5960832216bf67 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7d75cd9f49343355b14c7d60bb0df0936ffe0278 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..28ef13f8d17e977f710ba9a863f182b1f80dc8cf --- /dev/null +++ b/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py b/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..106f7b6a1ece974c9f732ee813724bd8bda3bef3 --- /dev/null +++ b/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fp16/README.md b/configs/fp16/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4b64cd96f22ed4d2a14e93d7b28e1cd4ede6fa97 --- /dev/null +++ b/configs/fp16/README.md @@ -0,0 +1,25 @@ +# Mixed Precision Training + +## Introduction + + + +```latex +@article{micikevicius2017mixed, + title={Mixed precision training}, + author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others}, + journal={arXiv preprint arXiv:1710.03740}, + year={2017} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 512x1024 | 80000 | 5.37 | 8.64 | 76.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes-50245227.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230921.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 80000 | 5.34 | 8.77 | 79.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes-ade37931.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230919.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | 5.75 | 3.86 | 80.48 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes-bc86dc84.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | 6.35 | 7.87 | 80.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes-cc58bc8d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) | diff --git a/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1d7e1bef6f8a8b89339bc6343d183ef2935cb416 --- /dev/null +++ b/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..eaf569d4d76af2e498c039899c01f9960b1158d9 --- /dev/null +++ b/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..8e85e56bd66aa61c3d43547acb7c2d6d91f14133 --- /dev/null +++ b/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..cb2c27e44f33170130a233abf0524d5e346656db --- /dev/null +++ b/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..72f10d14b34a1df68dfeb0fdb056d527ed698c26 --- /dev/null +++ b/configs/gcnet/README.md @@ -0,0 +1,48 @@ +# GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond + +## Introduction + + + +```latex +@inproceedings{cao2019gcnet, + title={Gcnet: Non-local networks meet squeeze-excitation networks and beyond}, + author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision Workshops}, + pages={0--0}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| GCNet | R-50-D8 | 512x1024 | 40000 | 5.8 | 3.93 | 77.69 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.61 | 78.28 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-50-D8 | 769x769 | 40000 | 6.5 | 1.67 | 78.12 | 80.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json) | +| GCNet | R-101-D8 | 769x769 | 40000 | 10.5 | 1.13 | 78.95 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json) | +| GCNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.48 | 80.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.03 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-50-D8 | 769x769 | 80000 | - | - | 78.68 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json) | +| GCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.18 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.38 | 41.47 | 42.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json) | +| GCNet | R-101-D8 | 512x512 | 80000 | 12 | 15.20 | 42.82 | 44.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json) | +| GCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.37 | 43.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json) | +| GCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.69 | 45.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| GCNet | R-50-D8 | 512x512 | 20000 | 5.8 | 23.35 | 76.42 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json) | +| GCNet | R-101-D8 | 512x512 | 20000 | 9.2 | 14.80 | 77.41 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) | +| GCNet | R-50-D8 | 512x512 | 40000 | - | - | 76.24 | 77.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json) | +| GCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.84 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) | diff --git a/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..27bd9422dad49bc5a06f577ee45cd834bdbe3912 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7f0f83fe39da31fe9a5b497e0481e1c79a33e764 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py b/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..9888120f65b045df1c7d4d05fb010373abf82ccf --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py b/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..1b70ca8e46a0409379f5ae9809ce03de203426ad --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py b/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..b17c7a12b547ee4e1cd60d667c575eab06eb071c --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py b/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..a2183fc2db1ff188b0ad5418e55f71005da926cc --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..08a6031f20234b1cc1d792ea5d4891613503a185 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py b/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5efb61339cdbdde585f7814e9650be2e2df654ac --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..610467c07204140bf604f8dda2aa57978c565ed3 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..155e28f42194112703bb21473e5e3dd0fca40d49 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py b/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..1549a4d5bf10cd3fd6e3bd57bf7a48e7e5e1ede8 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py b/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..a496204bdb061d975c40cb7ef2aaada40e020a13 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py b/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..d85cf6550fea5da7cf1fa078eb4fa30e017166b4 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py b/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..89d5e1ae0f3ef44626f3b5534c504cbce7389a32 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..332495d3d7f7d7c7c0e0aca4e379cd54e2ed07de --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py b/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d6d9cb1c64bcf8c3e952b6f8adc11bec0403d106 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ca51545f638aa7a7fb57c1edbc667377416d92e9 --- /dev/null +++ b/configs/hrnet/README.md @@ -0,0 +1,66 @@ +# Deep High-Resolution Representation Learning for Human Pose Estimation + +## Introduction + + + +```latext +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x1024 | 40000 | 1.7 | 23.74 | 73.86 | 75.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 40000 | 2.9 | 12.97 | 77.19 | 78.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 40000 | 6.2 | 6.42 | 78.48 | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 75.31 | 77.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.65 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 79.93 | 80.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 76.31 | 78.31 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 78.80 | 80.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 80.65 | 81.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 3.8 | 38.66 | 31.38 | 32.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 4.9 | 22.57 | 35.51 | 36.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145-66f20cb7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20200614_185145.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 8.2 | 21.23 | 41.90 | 43.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 33.00 | 34.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20200614_214413.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 160000 | - | - | 36.79 | 38.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 160000 | - | - | 42.02 | 43.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 20000 | 1.8 | 43.36 | 65.20 | 68.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503-56e36088.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20200617_224503.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 20000 | 2.9 | 23.48 | 72.30 | 74.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 20000 | 6.2 | 22.05 | 75.87 | 78.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | 45.14 | 47.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 45.84 | 47.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | - | - | 50.33 | 52.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 51.12 | 53.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json) | diff --git a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..5ff05aa595399d77ee51552c243e489f395a820e --- /dev/null +++ b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..d2eecf01637b1ef605fdd5c20833cc2e06accbc0 --- /dev/null +++ b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..cf315a4f0e6f397768572c590a634cc1b9d298a9 --- /dev/null +++ b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..9cbf4100d1f91161a4e8549d6b74799fc27ea35e --- /dev/null +++ b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..9f04e935c39b08de66629f913b30675ffff2a8fe --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] diff --git a/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..99760c36d8399204ca8e35f32690bcd369676852 --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a653dda19255214a1a412b645abddd3fc5c0d853 --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..45ed99b6813324a58575f9bb74ce0534626e10c4 --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict(decode_head=dict(num_classes=150)) diff --git a/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..f06448b168af4d2dcc5a1f96e4430a7948b7e170 --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict(decode_head=dict(num_classes=21)) diff --git a/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..d74e95943afca04ba4073e411e0b713985384129 --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(decode_head=dict(num_classes=21)) diff --git a/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..52bc9f5e91f2fdf9ce8f9e3a873902dd8db56522 --- /dev/null +++ b/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=150)) diff --git a/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..d09931048f762cd2ac224d62c2fe2ed8e0e148c8 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..0412c64f31d85997af9715949672ca55b07aaed7 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..584b7135fd95464f3d2c965440a0b92161cde09a --- /dev/null +++ b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..babd88db4eb5d96828adf8db2467b4f6fd8b7cf5 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..ddbe3801f99dc21120548af85c55c7cdcfadaea2 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x1024_160k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..4e31d26e093b6cb2d59b24bb3060c92bd7dccdea --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..ee2831d99d859c419b158b5f828d8a84063564ea --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..22a3ce0b38f36efc96595fe1c3ef428fc1575eb0 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..d0de5df75242e58ba572277d6fc5cf93675a097e --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_20k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..409db3c628edf63cd40e002f436884ce1fb75970 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_40k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..a8400979b1e94dd42343de656ffbc5fbb7a07944 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_80k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..0e2d96cb6ce7249852cb1d9b36a2f24bdce00199 --- /dev/null +++ b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..655b4604677b3d4c5eb155e8b2f1cdacbd4194d5 --- /dev/null +++ b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..e28164e3dc9d321bf0a97b37f14f3184f95a27a5 --- /dev/null +++ b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..012ad0a7d6119554ec00400ad18a09249a72eca4 --- /dev/null +++ b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py b/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..394a61c99f038c94fce58ac9c422b7c3ee4b5f50 --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x1024_160k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py b/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d37ab1d09ef51b1321ed8b3634fd99445efee543 --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py b/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a9bab32b52ca41155062c7655986ed84677a8280 --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py b/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..dff4fea85ced568c38d39408d459697e88ca0faa --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py b/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..a8d1deb98659d05755c6316c2aff2295afb0bb9c --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_20k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py b/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..1084a57e978195df6d45a9a00415953ddbaeeb51 --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_40k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py b/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..7eca7fa4b8102c6225af3b484ffff5bdc7c0f201 --- /dev/null +++ b/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_80k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/mobilenet_v2/README.md b/configs/mobilenet_v2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7356a0ec4d7205782fe8b27e480311b58d4293ff --- /dev/null +++ b/configs/mobilenet_v2/README.md @@ -0,0 +1,35 @@ +# MobileNetV2: Inverted Residuals and Linear Bottlenecks + +## Introduction + + + +```latex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 61.54 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | + +### ADE20k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | M-V2-D8 | 512x512 | 160000 | 6.5 | 64.4 | 19.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| PSPNet | M-V2-D8 | 512x512 | 160000 | 6.5 | 57.7 | 29.68 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| DeepLabV3 | M-V2-D8 | 512x512 | 160000 | 6.8 | 39.9 | 34.08 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x512 | 160000 | 8.2 | 43.1 | 34.02 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | diff --git a/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py b/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..267483d88ff25d75dc18c5c2d37375cd77c9639c --- /dev/null +++ b/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py b/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..e15b8cc82b09ac3e64875936cdfd0f663aaba936 --- /dev/null +++ b/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py b/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d4533d79a25771905d7f1900bf7b34037885a77a --- /dev/null +++ b/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320, c1_in_channels=24), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py b/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..7615a7c19a3f19635b71801a55e4544be4d215b5 --- /dev/null +++ b/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320, c1_in_channels=24), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py b/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a535bd0ed8a4883134acdc52cf3f77c8d897ce82 --- /dev/null +++ b/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py b/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..c5f6ab0d62e269e44dac016eb5ac58f49c1fa292 --- /dev/null +++ b/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py b/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7403bee864d833bcc31160665e4b54fdd738cc13 --- /dev/null +++ b/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py b/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..5b72ac830be29b865ed52adaf41f2fe800f252cc --- /dev/null +++ b/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v3/README.md b/configs/mobilenet_v3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a843d355b6c95946517b50b6867d53f1ffcaf869 --- /dev/null +++ b/configs/mobilenet_v3/README.md @@ -0,0 +1,28 @@ +# Searching for MobileNetV3 + +## Introduction + + + +```latex +@inproceedings{Howard_2019_ICCV, + title={Searching for MobileNetV3}, + author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and Le, Quoc V. and Adam, Hartwig}, + booktitle={The IEEE International Conference on Computer Vision (ICCV)}, + pages={1314-1324}, + month={October}, + year={2019}, + doi={10.1109/ICCV.2019.00140}} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LRASPP | M-V3-D8 | 512x1024 | 320000 | 8.9 | 15.22 | 69.54 | 70.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3-D8 (scratch) | 512x1024 | 320000 | 8.9 | 14.77 | 67.87 | 69.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3s-D8 | 512x1024 | 320000 | 5.3 | 23.64 | 64.11 | 66.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json) | +| LRASPP | M-V3s-D8 (scratch) | 512x1024 | 320000 | 5.3 | 24.50 | 62.74 | 65.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json) | diff --git a/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e59a78b48be3a0997a31524fd78e7fad5636bc82 --- /dev/null +++ b/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict(pretrained='open-mmlab://contrib/mobilenet_v3_large') + +# Re-config the data sampler. +data = dict(samples_per_gpu=4, workers_per_gpu=4) + +runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c5435142db6b1f81421f5fd96d07ece32b5f38 --- /dev/null +++ b/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +# Re-config the data sampler. +data = dict(samples_per_gpu=4, workers_per_gpu=4) + +runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d4e368b2a11ed6433d8f2594a2cc3184fe5ddfff --- /dev/null +++ b/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py @@ -0,0 +1,23 @@ +_base_ = './lraspp_m-v3-d8_512x1024_320k_cityscapes.py' +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://contrib/mobilenet_v3_small', + backbone=dict( + type='MobileNetV3', + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py b/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0c5f707200c5d8b6d39493762baf59023dcaad11 --- /dev/null +++ b/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py @@ -0,0 +1,22 @@ +_base_ = './lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py' +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/configs/nonlocal_net/README.md b/configs/nonlocal_net/README.md new file mode 100644 index 0000000000000000000000000000000000000000..da0924ac60f0a16a17fe4705e0edbf5aad962a82 --- /dev/null +++ b/configs/nonlocal_net/README.md @@ -0,0 +1,48 @@ +# Non-local Neural Networks + +## Introduction + + + +```latex +@inproceedings{wang2018non, + title={Non-local neural networks}, + author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={7794--7803}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| -------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocal | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.72 | 78.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocal | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.95 | 78.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocal | R-50-D8 | 769x769 | 40000 | 8.9 | 1.52 | 78.33 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json) | +| NonLocal | R-101-D8 | 769x769 | 40000 | 12.8 | 1.05 | 78.57 | 80.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json) | +| NonLocal | R-50-D8 | 512x1024 | 80000 | - | - | 78.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json) | +| NonLocal | R-101-D8 | 512x1024 | 80000 | - | - | 78.93 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) | +| NonLocal | R-50-D8 | 769x769 | 80000 | - | - | 79.05 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json) | +| NonLocal | R-101-D8 | 769x769 | 80000 | - | - | 79.40 | 80.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| -------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocal | R-50-D8 | 512x512 | 80000 | 9.1 | 21.37 | 40.75 | 42.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json) | +| NonLocal | R-101-D8 | 512x512 | 80000 | 12.6 | 13.97 | 42.90 | 44.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json) | +| NonLocal | R-50-D8 | 512x512 | 160000 | - | - | 42.03 | 43.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json) | +| NonLocal | R-101-D8 | 512x512 | 160000 | - | - | 43.36 | 44.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422-affd0f8d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20200616_003422.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| -------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocal | R-50-D8 | 512x512 | 20000 | 6.4 | 21.21 | 76.20 | 77.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json) | +| NonLocal | R-101-D8 | 512x512 | 20000 | 9.8 | 14.01 | 78.15 | 78.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) | +| NonLocal | R-50-D8 | 512x512 | 40000 | - | - | 76.65 | 77.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json) | +| NonLocal | R-101-D8 | 512x512 | 40000 | - | - | 78.27 | 79.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) | diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..ef7b06dd3806c1d93be41943ab4d7d49f68ac830 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7a1e66cf1c239eac3c6a4876a35d82e7b6ccec2e --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..df9c2aca9c7c1999d74a08a58aca5d220f7df54a --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..490f9873a29f2626ad764825eec97f16ee7f9f96 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..40d9190fba223251b794c105b036e4794865f785 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py b/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..0c6f60dac7b457d3b936a5f7f43eb84713c77e05 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..23e6da7f23180c2350253ea400f444c0c3064fd6 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0627e2b5a76dead859212d4cab116c160df21404 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..9d4dc7390370d0ffe21e7dcb686eeff7261952c4 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b0672b687ade8d554b71fdf0bc54de9f024fa30c --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..b1adfbab882d9825a3f348ed99e401d1f164cd11 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..2e808d8072f34d09a7b0859f90261dd66c8815dd --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..66b443abec3282242c0f794a2f91e066596e7ee9 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py b/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..8a7a2f509ba6627ad5ab972ac090362bbcd2ecb7 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..75adef324877d56c157b457eecbf8446aa6b192f --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py b/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a0726c293d6026898110f7fa55d5e7d2d55d7a02 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ocrnet/README.md b/configs/ocrnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..136b49d4b6f5907b750447ac4323b26610cd3071 --- /dev/null +++ b/configs/ocrnet/README.md @@ -0,0 +1,69 @@ +# Object-Contextual Representations for Semantic Segmentation + +## Introduction + + + +```latex +@article{YuanW18, + title={Ocnet: Object context network for scene parsing}, + author={Yuhui Yuan and Jingdong Wang}, + booktitle={arXiv preprint arXiv:1809.00916}, + year={2018} +} + +@article{YuanCW20, + title={Object-Contextual Representations for Semantic Segmentation}, + author={Yuhui Yuan and Xilin Chen and Jingdong Wang}, + booktitle={ECCV}, + year={2020} +} +``` + +## Results and models + +### Cityscapes + +#### HRNet backbone + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 74.30 | 75.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | + +#### ResNet backbone + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | R-101-D8 | 512x1024 | 8 | 40000 | - | - | 80.09 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes-02ac0f13.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 40000 | 8.8 | 3.02 | 80.30 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes-db500f80.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 80000 | 8.8 | 3.02 | 80.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes-78688424.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 80000 | 6.7 | 28.98 | 35.06 | 35.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 80000 | 7.9 | 18.93 | 37.79 | 39.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 80000 | 11.2 | 16.99 | 43.00 | 44.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 37.19 | 38.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 160000 | - | - | 39.32 | 40.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 160000 | - | - | 43.25 | 44.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 20000 | 3.5 | 31.55 | 71.70 | 73.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 20000 | 4.7 | 19.91 | 74.75 | 77.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 20000 | 8.1 | 17.83 | 77.72 | 79.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 72.76 | 74.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 40000 | - | - | 74.98 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 40000 | - | - | 77.14 | 79.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json) | diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1c86eba17c46a863091d999b1a090e1237202ec5 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..2c73b3839c8c1bc859eb3b8864256a00cfd022fe --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..506ad9319a9418f50650c477698c9b5cb9bf6663 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c86e18ea65c6aaa36a4fb6e2708f08c7ae1698 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..ab9d6446c9089bfae533b9dcd66e1352d81f74d0 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..df79a9cf13963d26384b00ced0cf5efa9f68a420 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad67722a50c2b2ece5fcb7f0dd1819061ff6b3e --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fc7909785f743071cad2cd1032000405435f81d4 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..923731f74f80c11e196f6099b1c84875686cd441 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..be6bf16a2fd234f3526bf8fb8c30179f1ef9df78 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..81f3d5cb91607134bb1d844d78df7a3c411c134d --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..ceb944815b5a979ddb72015295375f6fe0c31a89 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..70babc91c99eb99ee4f941b34ea886236531832e --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..36e77219ac2d7ee6795db7c40ad7341749a3b1c7 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_80k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py b/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c094391b1dfcef2fa6278f0c181fb50c303f7a4c --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py b/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0aada9d8dcd792fd4fc7da8908cc11d44a9ff521 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py b/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1b2e0094393151fa8975a0d53c48b6048b7e1929 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py b/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..3b3e8af9538e6ce3c929a902e3d1ee5be53469a5 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_160k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py b/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..c2dd6d1158bd31ecdd7874827fd37bffb5d26db6 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py b/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..89e6309f55f6b939f7d79271513da4934bbacbb6 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py b/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..04971226eb0fd6461b715358ac955dfb78102992 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_80k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..3dd70b74a0bf912d8a6fd39f1f26be7f7571ccd6 --- /dev/null +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +optimizer = dict(lr=0.02) +lr_config = dict(min_lr=2e-4) diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e34f3432e581ff506c9d2951c98b5aad7b1be6a5 --- /dev/null +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..33d96c76f68b92217ed38afe9538144dfedc4fd2 --- /dev/null +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +optimizer = dict(lr=0.02) +lr_config = dict(min_lr=2e-4) diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9031f2b70e63d2c9cd1a3de3ee4a1dda5f3e04e7 --- /dev/null +++ b/configs/point_rend/README.md @@ -0,0 +1,31 @@ +# PointRend: Image Segmentation as Rendering + +## Introduction + + + +``` +@inproceedings{kirillov2020pointrend, + title={Pointrend: Image segmentation as rendering}, + author={Kirillov, Alexander and Wu, Yuxin and He, Kaiming and Girshick, Ross}, + booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages={9799--9808}, + year={2020} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PointRend | R-50 | 512x1024 | 80000 | 3.1 | 8.48 | 76.47 | 78.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json) | +| PointRend | R-101 | 512x1024 | 80000 | 4.2 | 7.00 | 78.30 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PointRend | R-50 | 512x512 | 160000 | 5.1 | 17.31 | 37.64 | 39.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json) | +| PointRend | R-101 | 512x512 | 160000 | 6.1 | 15.50 | 40.02 | 41.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) | diff --git a/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py b/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a8c14c8cf91d7cbcc05065a6dc387101dff8cdf6 --- /dev/null +++ b/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py b/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..4d1f8c8154431b056fb8371772f03dfa49ac1ad3 --- /dev/null +++ b/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..96cbaa48d61ee208117d074e9f06bf4218407d78 --- /dev/null +++ b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +lr_config = dict(warmup='linear', warmup_iters=200) diff --git a/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..db8c634c0f889c69ce80f86c445c493dcfdbd3c8 --- /dev/null +++ b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) +]) +lr_config = dict(warmup='linear', warmup_iters=200) diff --git a/configs/psanet/README.md b/configs/psanet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..01ed322587c23eac095fd870fca87d2a100fa24e --- /dev/null +++ b/configs/psanet/README.md @@ -0,0 +1,48 @@ +# PSANet: Point-wise Spatial Attention Network for Scene Parsing + +## Introduction + + + +```latex +@inproceedings{zhao2018psanet, + title={Psanet: Point-wise spatial attention network for scene parsing}, + author={Zhao, Hengshuang and Zhang, Yi and Liu, Shu and Shi, Jianping and Change Loy, Chen and Lin, Dahua and Jia, Jiaya}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + pages={267--283}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSANet | R-50-D8 | 512x1024 | 40000 | 7 | 3.17 | 77.63 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json) | +| PSANet | R-101-D8 | 512x1024 | 40000 | 10.5 | 2.20 | 79.14 | 80.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) | +| PSANet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.40 | 77.99 | 79.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json) | +| PSANet | R-101-D8 | 769x769 | 40000 | 11.9 | 0.98 | 78.43 | 80.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json) | +| PSANet | R-50-D8 | 512x1024 | 80000 | - | - | 77.24 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json) | +| PSANet | R-101-D8 | 512x1024 | 80000 | - | - | 79.31 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) | +| PSANet | R-50-D8 | 769x769 | 80000 | - | - | 79.31 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json) | +| PSANet | R-101-D8 | 769x769 | 80000 | - | - | 79.69 | 80.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 80000 | 9 | 18.91 | 41.14 | 41.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json) | +| PSANet | R-101-D8 | 512x512 | 80000 | 12.5 | 13.13 | 43.80 | 44.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json) | +| PSANet | R-50-D8 | 512x512 | 160000 | - | - | 41.67 | 42.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json) | +| PSANet | R-101-D8 | 512x512 | 160000 | - | - | 43.74 | 45.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSANet | R-50-D8 | 512x512 | 20000 | 6.9 | 18.24 | 76.39 | 77.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json) | +| PSANet | R-101-D8 | 512x512 | 20000 | 10.4 | 12.63 | 77.91 | 79.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) | +| PSANet | R-50-D8 | 512x512 | 40000 | - | - | 76.30 | 77.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json) | +| PSANet | R-101-D8 | 512x512 | 40000 | - | - | 77.73 | 79.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) | diff --git a/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py b/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..69d212f158552cf5a24f62174b24a9d4976477bb --- /dev/null +++ b/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py b/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..bc25d6aaf67ccb7e9fcb44ba2d803bebfa31b160 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py b/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..7f6795e5ef0e4bf1d10ee7ed4f608bf93ac24216 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py b/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..1a3c43495bbf9d302216d7ddf62df75446907a36 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py b/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..f62eef9773ddf41d996104de571bcda00c488e14 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py b/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..f8865a7c4d795d9de3f5bc6b762b305b3cabc22f --- /dev/null +++ b/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py b/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..ffc99f010903267fc7c1893f4a6b0dcd2cbe42e6 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py b/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6a9efc55ad2062facf3a568f8cdbba76c8c55950 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py b/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..6671fcb4bf8430bc0128cd93a4b8cedea1856b03 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py b/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a441013a4c1adc39fc064dbac23caaac9efdc4a6 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py b/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..9c6364eb43e2abc95011205b569627ff9367d0e5 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(mask_size=(66, 66), num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py b/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..af06cb66cc808c206d6946a4b2420a6942d3dc7e --- /dev/null +++ b/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py b/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..803c42da35eda861bf32ce0e7866cdc9fad96d0d --- /dev/null +++ b/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py b/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..0141a6d0925c2a2aa37517670a9f12ac7d3a02d4 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(mask_size=(66, 66), num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py b/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..690f8b5ef359be8a8be3a2d768aede24216a8706 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py b/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0966b4770cc649e95525c366b09801408b99567a --- /dev/null +++ b/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/README.md b/configs/pspnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..66f3dc286f066c50ef54e98de036ef0f5056e246 --- /dev/null +++ b/configs/pspnet/README.md @@ -0,0 +1,69 @@ +# Pyramid Scene Parsing Network + +## Introduction + + + +```latex +@inproceedings{zhao2017pspnet, + title={Pyramid Scene Parsing Network}, + author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya}, + booktitle={CVPR}, + year={2017} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | --------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x1024 | 40000 | 6.1 | 4.07 | 77.85 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 40000 | 9.6 | 2.68 | 78.34 | 79.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-50-D8 | 769x769 | 40000 | 6.9 | 1.76 | 78.26 | 79.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json) | +| PSPNet | R-101-D8 | 769x769 | 40000 | 10.9 | 1.15 | 79.08 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json) | +| PSPNet | R-18-D8 | 512x1024 | 80000 | 1.7 | 15.71 | 74.87 | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.76 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) | +| PSPNet | R-18-D8 | 769x769 | 80000 | 1.9 | 6.20 | 75.90 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 769x769 | 80000 | - | - | 79.59 | 80.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json) | +| PSPNet | R-101-D8 | 769x769 | 80000 | - | - | 79.77 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json) | +| PSPNet | R-18b-D8 | 512x1024 | 80000 | 1.5 | 16.28 | 74.23 | 75.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json) | +| PSPNet | R-50b-D8 | 512x1024 | 80000 | 6.0 | 4.30 | 78.22 | 79.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json) | +| PSPNet | R-101b-D8 | 512x1024 | 80000 | 9.5 | 2.76 | 79.69 | 80.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.41 | 74.92 | 76.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json) | +| PSPNet | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.88 | 78.50 | 79.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json) | +| PSPNet | R-101b-D8 | 769x769 | 80000 | 10.8 | 1.17 | 78.87 | 80.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.53 | 41.13 | 41.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 12 | 15.30 | 43.57 | 44.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | 42.48 | 43.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | 44.39 | 45.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | 512x512 | 20000 | 6.1 | 23.59 | 76.78 | 77.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | 46.60 | 47.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 46.03 | 47.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-101-D8 | 480x480 | 40000 | - | - | 52.02 | 53.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 52.47 | 53.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json) | diff --git a/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..0b5a990604a77238375cb6d2b8298a382a457dd6 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..081cb3732a110bde277abdbed129adce71bbecea --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..fda9110603d71e14cab6e537949be191f2adf6db --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..795c51f8cff7e057b6c4872de079c179d61c4014 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..38fee11bc23d8c92c529acd0c02a68204e34ab91 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..9931a07bc2d137eb49b3fa4dad8f8681d4f5e943 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py b/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..6107b41544378ad371cee95ee5ebc2e98ccbd9ad --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py b/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..2221b202d6c53c4b04f2431d3344379cbfe06dd7 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py b/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..15f578b6002c481ada06befc3ea66accbbdd1f66 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py b/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..fb7c3d55d57b09296ea24889b218f9a0fb997463 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..c6e7e58508f31627766b8ab748bd81cd51c77eca --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..59b8c6dd5ef234334bcdfa3d5e3594b7a9989b17 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..ab8a3d3e3fcc12dd41223af190e2ae04f14d1cb8 --- /dev/null +++ b/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1a7cb708e551e90a12ad4267e2af6938c353f0ba --- /dev/null +++ b/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d914f93c023a6384e0e856b8608280cef589d5c6 --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5893e66a41cad73e8fb24aa58dc78ef002aecca5 --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..abeeedf84387d7846a8a2c10480b94c9d8405559 --- /dev/null +++ b/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..284be6d09af1806b99bee5b85286b55ce02e8cbd --- /dev/null +++ b/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..30abe46e7054b2203c0338b93aeb5b5dd059ba82 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..88041c6817d2cb152a979b71a2ce56a9e30b87b5 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..09e96dabf74cc17a5fcb09b114f2bddd2af9af7f --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..d4065ec05c5c12e2b24a1433b38580b3c640d6be --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..5deb5872b00a30d5c18a980c4d6c1b0d915908b9 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..4e9972849d6899fe43f435284d0e0b1bc3b0e7a9 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py b/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..86584573a3d1afac73041b85516112ac21f1f17c --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py b/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..cd88154d5e0be1a519e973331e0a14ae8a7de13e --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py b/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..f0c20c12f6bcf04b732dccaa4bfdba10bd10b5e6 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py b/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..52efdf51d7d66c3205c1448c45ae281649a0901e --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..145cadb24016eeea87fccff8171c5b0dfb78f7ab --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..23a81eb7ef56a4cd8e7c9da65b86f3d0e562001a --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..946bf4fc84236942a4462c2daa7637cace4e90cf --- /dev/null +++ b/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py b/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b6087dcf9f7cc04e12a2b9bcbde7abc4a56e972e --- /dev/null +++ b/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/resnest/README.md b/configs/resnest/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b610c14c3ef971ac075d5fb2223d2d5f2b4098bf --- /dev/null +++ b/configs/resnest/README.md @@ -0,0 +1,34 @@ +# ResNeSt: Split-Attention Networks + +## Introduction + + + +```latex +@article{zhang2020resnest, +title={ResNeSt: Split-Attention Networks}, +author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, +journal={arXiv preprint arXiv:2004.08955}, +year={2020} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | S-101-D8 | 512x1024 | 80000 | 11.4 | 2.39 | 77.56 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| PSPNet | S-101-D8 | 512x1024 | 80000 | 11.8 | 2.52 | 78.57 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| DeepLabV3 | S-101-D8 | 512x1024 | 80000 | 11.9 | 1.88 | 79.67 | 80.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | +| DeepLabV3+ | S-101-D8 | 512x1024 | 80000 | 13.2 | 2.36 | 79.62 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | + +### ADE20k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x512 | 160000 | 14.2 | 12.86 | 45.62 | 46.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| PSPNet | S-101-D8 | 512x512 | 160000 | 14.2 | 13.02 | 45.44 | 46.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| DeepLabV3 | S-101-D8 | 512x512 | 160000 | 14.6 | 9.28 | 45.71 | 46.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | +| DeepLabV3+ | S-101-D8 | 512x512 | 160000 | 16.2 | 11.96 | 46.47 | 47.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | diff --git a/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py b/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f98398690eb3e1e77975d7fb94ea865424aa331b --- /dev/null +++ b/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py b/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..e3924ad679cb3d7ba731322f9cdb67410baae59a --- /dev/null +++ b/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py b/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..69bef7238345cf6aabb126012af992602f910287 --- /dev/null +++ b/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py b/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..d51bccb965dafc40d7859219d132dc9467740a1b --- /dev/null +++ b/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py b/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..33fa0252d8b4cc786f1297605c169ee6068195a4 --- /dev/null +++ b/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py b/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..dcee8c280e833825f84b944c6db21e9a43125e06 --- /dev/null +++ b/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py b/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..9737849cbd7470b03ef3fcb3b1225283370eb503 --- /dev/null +++ b/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py b/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..6a622eae963401e143004a62ff53071ddbf61c01 --- /dev/null +++ b/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/sem_fpn/README.md b/configs/sem_fpn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c59698db58a50d8230610629577aac4fa92f247b --- /dev/null +++ b/configs/sem_fpn/README.md @@ -0,0 +1,35 @@ +# Panoptic Feature Pyramid Networks + +## Introduction + + + +```latex +@article{Kirillov_2019, + title={Panoptic Feature Pyramid Networks}, + ISBN={9781728132938}, + url={http://dx.doi.org/10.1109/CVPR.2019.00656}, + DOI={10.1109/cvpr.2019.00656}, + journal={2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + publisher={IEEE}, + author={Kirillov, Alexander and Girshick, Ross and He, Kaiming and Dollar, Piotr}, + year={2019}, + month={Jun} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | R-50 | 512x1024 | 80000 | 2.8 | 13.54 | 74.52 | 76.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json) | +| FPN | R-101 | 512x1024 | 80000 | 3.9 | 10.29 | 75.80 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | R-50 | 512x512 | 160000 | 4.9 | 55.77 | 37.49 | 39.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json) | +| FPN | R-101 | 512x512 | 160000 | 5.9 | 40.58 | 39.35 | 40.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json) | diff --git a/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py b/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..7f8710d4be4ee0664f644b9037fd4653e4655907 --- /dev/null +++ b/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fpn_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py b/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..2654096dfd78ecdd4065ac2d26cab3e2f11a9c65 --- /dev/null +++ b/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './fpn_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py b/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..4bf3edd825296fbbed883effc3622793e9adf071 --- /dev/null +++ b/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fpn_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py b/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..5cdfc8ca264c6045dcb7ad890d89f15537bef233 --- /dev/null +++ b/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fpn_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict(decode_head=dict(num_classes=150)) diff --git a/configs/unet/README.md b/configs/unet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c419c05aff6534fd1b232d586f9759a22139e13 --- /dev/null +++ b/configs/unet/README.md @@ -0,0 +1,50 @@ +# U-Net: Convolutional Networks for Biomedical Image Segmentation + +## Introduction + + + +```latex +@inproceedings{ronneberger2015u, + title={U-net: Convolutional networks for biomedical image segmentation}, + author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, + booktitle={International Conference on Medical image computing and computer-assisted intervention}, + pages={234--241}, + year={2015}, + organization={Springer} +} +``` + +## Results and models + +### DRIVE + +| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice | config | download | +| ----------- | --------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet-S5-D16 | FCN | 584x565 | 64x64 | 42x42 | 40000 | 0.680 | - | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-26cee593.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive-20201223_191051.log.json) | +| UNet-S5-D16 | PSPNet | 584x565 | 64x64 | 42x42 | 40000 | 0.599 | - | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json) | +| UNet-S5-D16 | DeepLabV3 | 584x565 | 64x64 | 42x42 | 40000 | 0.596 | - | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json) | + +### STARE + +| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice | config | download | +| ----------- | --------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UNet-S5-D16 | FCN | 605x700 | 128x128 | 85x85 | 40000 | 0.968 | - | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-6ea7cfda.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare-20201223_191051.log.json) | +| UNet-S5-D16 | PSPNet | 605x700 | 128x128 | 85x85 | 40000 | 0.982 | - | 81.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json) | +| UNet-S5-D16 | DeepLabV3 | 605x700 | 128x128 | 85x85 | 40000 | 0.999 | - | 80.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json) | + +### CHASE_DB1 + +| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice | config | download | +| ----------- | --------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UNet-S5-D16 | FCN | 960x999 | 128x128 | 85x85 | 40000 | 0.968 | - | 80.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-95852f45.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json) | +| UNet-S5-D16 | PSPNet | 960x999 | 128x128 | 85x85 | 40000 | 0.982 | - | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json) | +| UNet-S5-D16 | DeepLabV3 | 960x999 | 128x128 | 85x85 | 40000 | 0.999 | - | 80.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json) | + +### HRF + +| Backbone | Head | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Dice | config | download | +| ----------- | --------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UNet-S5-D16 | FCN | 2336x3504 | 256x256 | 170x170 | 40000 | 2.525 | - | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-df3ec8c4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json) | +| UNet-S5-D16 | PSPNet | 2336x3504 | 256x256 | 170x170 | 40000 | 2.588 | - | 80.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json) | +| UNet-S5-D16 | DeepLabV3 | 2336x3504 | 256x256 | 170x170 | 40000 | 2.604 | - | 80.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json) | diff --git a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..c706cf3548e311a7930e5b58299e05af30c43d98 --- /dev/null +++ b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', + '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py new file mode 100644 index 0000000000000000000000000000000000000000..0ef02dcc491871f148b1ad038d281d250eb6e2f4 --- /dev/null +++ b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..118428bc44d3078517e231399b131db492f2bc7e --- /dev/null +++ b/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py b/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py new file mode 100644 index 0000000000000000000000000000000000000000..1f8862a0e89243d67634f37c3aca94ca98feff5c --- /dev/null +++ b/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..2bc52d96293f214adf1e3e1878746ed8bd2434f6 --- /dev/null +++ b/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/chase_db1.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py b/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py new file mode 100644 index 0000000000000000000000000000000000000000..5d836c61dfd568dd4d29d876980001067dcaa200 --- /dev/null +++ b/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..be8eec77792f4eb16475dc5ab8607fb5682f0acf --- /dev/null +++ b/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py b/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py new file mode 100644 index 0000000000000000000000000000000000000000..80483ade4a4bc3dc5cb8805e8b74c100e872da0c --- /dev/null +++ b/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..b085a17d6bab5f4d33668bfcf232e30f2a9830fe --- /dev/null +++ b/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', + '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py b/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py new file mode 100644 index 0000000000000000000000000000000000000000..9d729cea699e1c845549c74b52703c9ee3273662 --- /dev/null +++ b/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..f57c9166b67a18fd74f474754b3baec6584b17cf --- /dev/null +++ b/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py b/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py new file mode 100644 index 0000000000000000000000000000000000000000..7b5421ad6877e4b35b0a6ae6e516e577404547ce --- /dev/null +++ b/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +evaluation = dict(metric='mDice') diff --git a/configs/upernet/README.md b/configs/upernet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..312004a4d7a4f77d316a772eb45823aac5c61c08 --- /dev/null +++ b/configs/upernet/README.md @@ -0,0 +1,48 @@ +# Unified Perceptual Parsing for Scene Understanding + +## Introduction + + + +```latex +@inproceedings{xiao2018unified, + title={Unified perceptual parsing for scene understanding}, + author={Xiao, Tete and Liu, Yingcheng and Zhou, Bolei and Jiang, Yuning and Sun, Jian}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + pages={418--434}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UPerNet | R-50 | 512x1024 | 40000 | 6.4 | 4.25 | 77.10 | 78.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json) | +| UPerNet | R-101 | 512x1024 | 40000 | 7.4 | 3.79 | 78.69 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) | +| UPerNet | R-50 | 769x769 | 40000 | 7.2 | 1.76 | 77.98 | 79.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json) | +| UPerNet | R-101 | 769x769 | 40000 | 8.4 | 1.56 | 79.03 | 80.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json) | +| UPerNet | R-50 | 512x1024 | 80000 | - | - | 78.19 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json) | +| UPerNet | R-101 | 512x1024 | 80000 | - | - | 79.40 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) | +| UPerNet | R-50 | 769x769 | 80000 | - | - | 79.39 | 80.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json) | +| UPerNet | R-101 | 769x769 | 80000 | - | - | 80.10 | 81.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x512 | 80000 | 8.1 | 23.40 | 40.70 | 41.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json) | +| UPerNet | R-101 | 512x512 | 80000 | 9.1 | 20.34 | 42.91 | 43.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json) | +| UPerNet | R-50 | 512x512 | 160000 | - | - | 42.05 | 42.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json) | +| UPerNet | R-101 | 512x512 | 160000 | - | - | 43.82 | 44.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UPerNet | R-50 | 512x512 | 20000 | 6.4 | 23.17 | 74.82 | 76.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json) | +| UPerNet | R-101 | 512x512 | 20000 | 7.5 | 19.98 | 77.10 | 78.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) | +| UPerNet | R-50 | 512x512 | 40000 | - | - | 75.92 | 77.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json) | +| UPerNet | R-101 | 512x512 | 40000 | - | - | 77.43 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) | diff --git a/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py b/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b90b597d831a664761d6051397d2b1862feb59c6 --- /dev/null +++ b/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py b/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..420ca2e42836099213c1f91cb925088cfe7c1269 --- /dev/null +++ b/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_512x512_160k_ade20k.py b/configs/upernet/upernet_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..146f13eb79053cc69d4934d294aad9ba723b2577 --- /dev/null +++ b/configs/upernet/upernet_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_512x512_20k_voc12aug.py b/configs/upernet/upernet_r101_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..56345d1806482ac822d709893fe6942f44be6f74 --- /dev/null +++ b/configs/upernet/upernet_r101_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_512x512_40k_voc12aug.py b/configs/upernet/upernet_r101_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..0669b741b9b3e3e1a309147b920d3d2a1952ab75 --- /dev/null +++ b/configs/upernet/upernet_r101_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_512x512_80k_ade20k.py b/configs/upernet/upernet_r101_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..abfb9c5d9f35407d590cdc3325006b396ec52820 --- /dev/null +++ b/configs/upernet/upernet_r101_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_769x769_40k_cityscapes.py b/configs/upernet/upernet_r101_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e5f3a3fae18cb769fd04b0c669785c5728cf479f --- /dev/null +++ b/configs/upernet/upernet_r101_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_769x769_80k_cityscapes.py b/configs/upernet/upernet_r101_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..a709165657d257df4fc76148d225261c63f88d8a --- /dev/null +++ b/configs/upernet/upernet_r101_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py b/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..d621e89ce62c06424db7c2e5f5fd00a0a2e85a61 --- /dev/null +++ b/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py b/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..95fffcc76c2ff4f61f8dd80a00d35b7875262a50 --- /dev/null +++ b/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/upernet/upernet_r50_512x512_160k_ade20k.py b/configs/upernet/upernet_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..f5dd9aa4ed59d4939bcb49ffe129a9935e303201 --- /dev/null +++ b/configs/upernet/upernet_r50_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/upernet/upernet_r50_512x512_20k_voc12aug.py b/configs/upernet/upernet_r50_512x512_20k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..95f5c09567144db47e07fc802b114bedd6a00725 --- /dev/null +++ b/configs/upernet/upernet_r50_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/upernet/upernet_r50_512x512_40k_voc12aug.py b/configs/upernet/upernet_r50_512x512_40k_voc12aug.py new file mode 100644 index 0000000000000000000000000000000000000000..9621fd1f5c24e582b4a1eda18fcc0a13d2bcb953 --- /dev/null +++ b/configs/upernet/upernet_r50_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/configs/upernet/upernet_r50_512x512_80k_ade20k.py b/configs/upernet/upernet_r50_512x512_80k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..f561e309e3bddb439c90af930c4de5a0c7e209a7 --- /dev/null +++ b/configs/upernet/upernet_r50_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/upernet/upernet_r50_769x769_40k_cityscapes.py b/configs/upernet/upernet_r50_769x769_40k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..89b18aa2840d12e67339ce0b7a0561fa2ba0c6fa --- /dev/null +++ b/configs/upernet/upernet_r50_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/upernet/upernet_r50_769x769_80k_cityscapes.py b/configs/upernet/upernet_r50_769x769_80k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..29af98f2ebe341998fcf93f8a5c018cabcc0c0ba --- /dev/null +++ b/configs/upernet/upernet_r50_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/exp/.DS_Store b/exp/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..cc1092431436955acba7e76a3f08ee0078921883 Binary files /dev/null and b/exp/.DS_Store differ diff --git a/exp/upernet_global_base/config.py b/exp/upernet_global_base/config.py new file mode 100644 index 0000000000000000000000000000000000000000..a309591da09653835292e40c3bf7348f2d46181f --- /dev/null +++ b/exp/upernet_global_base/config.py @@ -0,0 +1,40 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[5, 8, 20, 7], + head_dim=64, + drop_path_rate=0.4, + use_checkpoint=True, + checkpoint_num=[0, 0, 2, 0], + windows=False, + hybrid=False + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_base/run.sh b/exp/upernet_global_base/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..ee49cf4006584c7f24203a15c7a9a11babacd49d --- /dev/null +++ b/exp/upernet_global_base/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_base_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/exp/upernet_global_base/test.sh b/exp/upernet_global_base/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..d9a85e7a0d3b7c96b060f473d41254b37a382fcb --- /dev/null +++ b/exp/upernet_global_base/test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/test.py ${work_path}/test_config_h32.py \ + ${work_path}/ckpt/latest.pth \ + --launcher pytorch \ + --eval mIoU \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/exp/upernet_global_base/test_config_g.py b/exp/upernet_global_base/test_config_g.py new file mode 100644 index 0000000000000000000000000000000000000000..a038afd00c3787832beb5cc60ae4c38866d7ad1d --- /dev/null +++ b/exp/upernet_global_base/test_config_g.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[5, 8, 20, 7], + head_dim=64, + drop_path_rate=0.4, + windows=False, + hybrid=False, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_base/test_config_h32.py b/exp/upernet_global_base/test_config_h32.py new file mode 100644 index 0000000000000000000000000000000000000000..1047ba99c52de0745bc48e253ec8f965b6da10d0 --- /dev/null +++ b/exp/upernet_global_base/test_config_h32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[5, 8, 20, 7], + head_dim=64, + drop_path_rate=0.4, + windows=False, + hybrid=True, + window_size=32, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_base/test_config_w32.py b/exp/upernet_global_base/test_config_w32.py new file mode 100644 index 0000000000000000000000000000000000000000..56291ed20571a5d5ebcdf797c49372856a04276a --- /dev/null +++ b/exp/upernet_global_base/test_config_w32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[5, 8, 20, 7], + head_dim=64, + drop_path_rate=0.4, + windows=True, + hybrid=False, + window_size=32, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_small/config.py b/exp/upernet_global_small/config.py new file mode 100644 index 0000000000000000000000000000000000000000..01db96bf9b0be531aa0eaf62fee51543712f8670 --- /dev/null +++ b/exp/upernet_global_small/config.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_small/run.sh b/exp/upernet_global_small/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..9fb22edfa7a32624ea08a63fe7d720c40db3b696 --- /dev/null +++ b/exp/upernet_global_small/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/exp/upernet_global_small/test.sh b/exp/upernet_global_small/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..d9a85e7a0d3b7c96b060f473d41254b37a382fcb --- /dev/null +++ b/exp/upernet_global_small/test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/test.py ${work_path}/test_config_h32.py \ + ${work_path}/ckpt/latest.pth \ + --launcher pytorch \ + --eval mIoU \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/exp/upernet_global_small/test_config_g.py b/exp/upernet_global_small/test_config_g.py new file mode 100644 index 0000000000000000000000000000000000000000..e43737a98a3b174a9f2fe059c06d511144686459 --- /dev/null +++ b/exp/upernet_global_small/test_config_g.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_small/test_config_h32.py b/exp/upernet_global_small/test_config_h32.py new file mode 100644 index 0000000000000000000000000000000000000000..a31e3874f76f9f7b089ac8834d85df2441af9b0e --- /dev/null +++ b/exp/upernet_global_small/test_config_h32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=True, + window_size=32 + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_global_small/test_config_w32.py b/exp/upernet_global_small/test_config_w32.py new file mode 100644 index 0000000000000000000000000000000000000000..3d9e06f029e46c14cb9ddb39319cabe86fef9b44 --- /dev/null +++ b/exp/upernet_global_small/test_config_w32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=True, + hybrid=False, + window_size=32 + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_hybrid_base/config.py b/exp/upernet_hybrid_base/config.py new file mode 100644 index 0000000000000000000000000000000000000000..adff140b06486ef3f8fa0bdfdc6a6042a2d6cd7a --- /dev/null +++ b/exp/upernet_hybrid_base/config.py @@ -0,0 +1,40 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[5, 8, 20, 7], + head_dim=64, + drop_path_rate=0.4, + use_checkpoint=True, + checkpoint_num=[0, 0, 2, 0], + windows=False, + hybrid=True, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_hybrid_base/run.sh b/exp/upernet_hybrid_base/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..ee49cf4006584c7f24203a15c7a9a11babacd49d --- /dev/null +++ b/exp/upernet_hybrid_base/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_base_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/exp/upernet_hybrid_small/config.py b/exp/upernet_hybrid_small/config.py new file mode 100644 index 0000000000000000000000000000000000000000..6d4e01911b269d5cb579b1c0ab0189c361512733 --- /dev/null +++ b/exp/upernet_hybrid_small/config.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=True + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/exp/upernet_hybrid_small/run.sh b/exp/upernet_hybrid_small/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..9fb22edfa7a32624ea08a63fe7d720c40db3b696 --- /dev/null +++ b/exp/upernet_hybrid_small/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/mmcv_custom/__init__.py b/mmcv_custom/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b958738b9fd93bfcec239c550df1d9a44b8c536 --- /dev/null +++ b/mmcv_custom/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from .checkpoint import load_checkpoint + +__all__ = ['load_checkpoint'] \ No newline at end of file diff --git a/mmcv_custom/checkpoint.py b/mmcv_custom/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..e2f3878beebfa36921f4e34d1b96840c2bfd5df1 --- /dev/null +++ b/mmcv_custom/checkpoint.py @@ -0,0 +1,500 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo +from torch.nn import functional as F + +import mmcv +from mmcv.fileio import FileClient +from mmcv.fileio import load as load_file +from mmcv.parallel import is_module_wrapper +from mmcv.utils import mkdir_or_exist +from mmcv.runner import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def load_url_dist(url, model_dir=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + return checkpoint + + +def load_pavimodel_dist(model_path, map_location=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + try: + from pavi import modelcloud + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load( + downloaded_file, map_location=map_location) + return checkpoint + + +def load_fileclient_dist(filename, backend, map_location): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + allowed_backends = ['ceph'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + if rank == 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +def _load_checkpoint(filename, map_location=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict | OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_urls = get_torchvision_models() + model_name = filename[11:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('torchvision://'): + model_urls = get_torchvision_models() + model_name = filename[14:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('open-mmlab://'): + model_urls = get_external_models() + model_name = filename[13:] + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'open-mmlab://{model_name} is deprecated in favor ' + f'of open-mmlab://{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_url_dist(model_url) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + elif filename.startswith('mmcls://'): + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_url_dist(model_urls[model_name]) + checkpoint = _process_mmcls_checkpoint(checkpoint) + elif filename.startswith(('http://', 'https://')): + checkpoint = load_url_dist(filename) + elif filename.startswith('pavi://'): + model_path = filename[7:] + checkpoint = load_pavimodel_dist(model_path, map_location=map_location) + elif filename.startswith('s3://'): + checkpoint = load_fileclient_dist( + filename, backend='ceph', map_location=map_location) + else: + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +def load_checkpoint(model, + filename, + map_location='cpu', + strict=False, + logger=None): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in state_dict.items()} + + # for MoBY, load model of online branch + if sorted(list(state_dict.keys()))[0].startswith('encoder'): + state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')} + + # reshape absolute position embedding + if state_dict.get('absolute_pos_embed') is not None: + absolute_pos_embed = state_dict['absolute_pos_embed'] + N1, L, C1 = absolute_pos_embed.size() + N2, C2, H, W = model.absolute_pos_embed.size() + if N1 != N2 or C1 != C2 or L != H*W: + logger.warning("Error in loading absolute_pos_embed, pass") + else: + state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2) + + # interpolate position bias table if needed + relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k] + for table_key in relative_position_bias_table_keys: + table_pretrained = state_dict[table_key] + table_current = model.state_dict()[table_key] + L1, nH1 = table_pretrained.size() + L2, nH2 = table_current.size() + if nH1 != nH2: + logger.warning(f"Error in loading {table_key}, pass") + else: + if L1 != L2: + S1 = int(L1 ** 0.5) + S2 = int(L2 ** 0.5) + table_pretrained_resized = F.interpolate( + table_pretrained.permute(1, 0).view(1, nH1, S1, S1), + size=(S2, S2), mode='bicubic') + state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0) + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, filename, optimizer=None, meta=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + try: + from pavi import modelcloud + from pavi.exception import NodeNotFoundError + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + mmcv.mkdir_or_exist(osp.dirname(filename)) + # immediately flush buffer + with open(filename, 'wb') as f: + torch.save(checkpoint, f) + f.flush() \ No newline at end of file diff --git a/mmseg/__init__.py b/mmseg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..96a8ca14fee923d613c1cd65c82b25545b6d1fc2 --- /dev/null +++ b/mmseg/__init__.py @@ -0,0 +1,30 @@ +import mmcv + +from .version import __version__, version_info + +MMCV_MIN = '1.3.1' +MMCV_MAX = '1.4.0' + + +def digit_version(version_str): + digit_version = [] + for x in version_str.split('.'): + if x.isdigit(): + digit_version.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + digit_version.append(int(patch_version[0]) - 1) + digit_version.append(int(patch_version[1])) + return digit_version + + +mmcv_min_version = digit_version(MMCV_MIN) +mmcv_max_version = digit_version(MMCV_MAX) +mmcv_version = digit_version(mmcv.__version__) + + +assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \ + f'MMCV=={mmcv.__version__} is used but incompatible. ' \ + f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.' + +__all__ = ['__version__', 'version_info'] diff --git a/mmseg/apis/__init__.py b/mmseg/apis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..170724be38de42daf2bc1a1910e181d68818f165 --- /dev/null +++ b/mmseg/apis/__init__.py @@ -0,0 +1,9 @@ +from .inference import inference_segmentor, init_segmentor, show_result_pyplot +from .test import multi_gpu_test, single_gpu_test +from .train import get_root_logger, set_random_seed, train_segmentor + +__all__ = [ + 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', + 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', + 'show_result_pyplot' +] diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..262f363c3069940c5366e9cca7d7abee34795cf5 --- /dev/null +++ b/mmseg/apis/inference.py @@ -0,0 +1,136 @@ +import matplotlib.pyplot as plt +import mmcv +import torch +from mmcv.parallel import collate, scatter +from mmcv.runner import load_checkpoint + +from mmseg.datasets.pipelines import Compose +from mmseg.models import build_segmentor + + +def init_segmentor(config, checkpoint=None, device='cuda:0'): + """Initialize a segmentor from config file. + + Args: + config (str or :obj:`mmcv.Config`): Config file path or the config + object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, str): + config = mmcv.Config.fromfile(config) + elif not isinstance(config, mmcv.Config): + raise TypeError('config must be a filename or Config object, ' + 'but got {}'.format(type(config))) + config.model.pretrained = None + config.model.train_cfg = None + model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +class LoadImage: + """A simple pipeline to load image.""" + + def __call__(self, results): + """Call function to load images into results. + + Args: + results (dict): A result dict contains the file name + of the image to be read. + + Returns: + dict: ``results`` will be returned containing loaded image. + """ + + if isinstance(results['img'], str): + results['filename'] = results['img'] + results['ori_filename'] = results['img'] + else: + results['filename'] = None + results['ori_filename'] = None + img = mmcv.imread(results['img']) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + +def inference_segmentor(model, img): + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + (list[Tensor]): The segmentation result. + """ + cfg = model.cfg + device = next(model.parameters()).device # model device + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + if next(model.parameters()).is_cuda: + # scatter to specified GPU + data = scatter(data, [device])[0] + else: + data['img_metas'] = [i.data[0] for i in data['img_metas']] + + # forward the model + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def show_result_pyplot(model, + img, + result, + palette=None, + fig_size=(15, 10), + opacity=0.5, + title='', + block=True): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (list): The segmentation result. + palette (list[list[int]]] | None): The palette of segmentation + map. If None is given, random palette will be generated. + Default: None + fig_size (tuple): Figure size of the pyplot figure. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + block (bool): Whether to block the pyplot figure. + Default is True. + """ + if hasattr(model, 'module'): + model = model.module + img = model.show_result( + img, result, palette=palette, show=False, opacity=opacity) + # plt.figure(figsize=fig_size) + # plt.imshow(mmcv.bgr2rgb(img)) + # plt.title(title) + # plt.tight_layout() + # plt.show(block=block) + return mmcv.bgr2rgb(img) diff --git a/mmseg/apis/test.py b/mmseg/apis/test.py new file mode 100644 index 0000000000000000000000000000000000000000..1597df6aa3a203df8c1cdad8dc7054e54a063f2d --- /dev/null +++ b/mmseg/apis/test.py @@ -0,0 +1,238 @@ +import os.path as osp +import pickle +import shutil +import tempfile + +import mmcv +import numpy as np +import torch +import torch.distributed as dist +from mmcv.image import tensor2imgs +from mmcv.runner import get_dist_info + + +def np2tmp(array, temp_file_name=None): + """Save ndarray to local numpy file. + + Args: + array (ndarray): Ndarray to save. + temp_file_name (str): Numpy file name. If 'temp_file_name=None', this + function will generate a file name with tempfile.NamedTemporaryFile + to save ndarray. Default: None. + + Returns: + str: The numpy file name. + """ + + if temp_file_name is None: + temp_file_name = tempfile.NamedTemporaryFile( + suffix='.npy', delete=False).name + np.save(temp_file_name, array) + return temp_file_name + + +def single_gpu_test(model, + data_loader, + show=False, + out_dir=None, + efficient_test=False, + opacity=0.5): + """Test with single GPU. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + show (bool): Whether show results during inference. Default: False. + out_dir (str, optional): If specified, the results will be dumped into + the directory to save output results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + + if show or out_dir: + img_tensor = data['img'][0] + img_metas = data['img_metas'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) + assert len(imgs) == len(img_metas) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + + ori_h, ori_w = img_meta['ori_shape'][:-1] + img_show = mmcv.imresize(img_show, (ori_w, ori_h)) + + if out_dir: + out_file = osp.join(out_dir, img_meta['ori_filename']) + else: + out_file = None + + model.module.show_result( + img_show, + result, + palette=dataset.PALETTE, + show=show, + out_file=out_file, + opacity=opacity) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, + data_loader, + tmpdir=None, + gpu_collect=False, + efficient_test=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' + it encodes results to gpu tensors and use gpu communication for results + collection. On cpu mode it saves the results on different gpus to 'tmpdir' + and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + if rank == 0: + batch_size = data['img'][0].size(0) + for _ in range(batch_size * world_size): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results with CPU.""" + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + tmpdir = tempfile.mkdtemp() + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank))) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i)) + part_list.append(mmcv.load(part_file)) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results with GPU.""" + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_list.append( + pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/mmseg/apis/train.py b/mmseg/apis/train.py new file mode 100644 index 0000000000000000000000000000000000000000..b15abc6f6f806537e1d335f58615a0b4749ac5ab --- /dev/null +++ b/mmseg/apis/train.py @@ -0,0 +1,116 @@ +import random +import warnings + +import numpy as np +import torch +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import build_optimizer, build_runner + +from mmseg.core import DistEvalHook, EvalHook +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.utils import get_root_logger + + +def set_random_seed(seed, deterministic=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def train_segmentor(model, + dataset, + cfg, + distributed=False, + validate=False, + timestamp=None, + meta=None): + """Launch segmentor training.""" + logger = get_root_logger(cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + data_loaders = [ + build_dataloader( + ds, + cfg.data.samples_per_gpu, + cfg.data.workers_per_gpu, + # cfg.gpus will be ignored if distributed + len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed, + drop_last=True) for ds in dataset + ] + + # put model on gpus + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + else: + model = MMDataParallel( + model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + if cfg.get('runner') is None: + cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} + warnings.warn( + 'config is now expected to have a `runner` section, ' + 'please set `runner` in your config.', UserWarning) + + runner = build_runner( + cfg.runner, + default_args=dict( + model=model, + batch_processor=None, + optimizer=optimizer, + work_dir=cfg.work_dir, + logger=logger, + meta=meta)) + + # register hooks + runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, + cfg.checkpoint_config, cfg.log_config, + cfg.get('momentum_config', None)) + + # an ugly walkaround to make the .log and .log.json filenames the same + runner.timestamp = timestamp + + # register eval hooks + if validate: + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + val_dataloader = build_dataloader( + val_dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=distributed, + shuffle=False) + eval_cfg = cfg.get('evaluation', {}) + eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' + eval_hook = DistEvalHook if distributed else EvalHook + runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow) diff --git a/mmseg/core/__init__.py b/mmseg/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..965605587211b7bf0bd6bc3acdbb33dd49cab023 --- /dev/null +++ b/mmseg/core/__init__.py @@ -0,0 +1,3 @@ +from .evaluation import * # noqa: F401, F403 +from .seg import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 diff --git a/mmseg/core/evaluation/__init__.py b/mmseg/core/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f7cc4b23413a0639e9de00eeb0bf600632d2c6cd --- /dev/null +++ b/mmseg/core/evaluation/__init__.py @@ -0,0 +1,8 @@ +from .class_names import get_classes, get_palette +from .eval_hooks import DistEvalHook, EvalHook +from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou + +__all__ = [ + 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', + 'eval_metrics', 'get_classes', 'get_palette' +] diff --git a/mmseg/core/evaluation/class_names.py b/mmseg/core/evaluation/class_names.py new file mode 100644 index 0000000000000000000000000000000000000000..0d8e66d54b47c200d969ec9fb0bbb642be5d12c3 --- /dev/null +++ b/mmseg/core/evaluation/class_names.py @@ -0,0 +1,152 @@ +import mmcv + + +def cityscapes_classes(): + """Cityscapes class names for external use.""" + return [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle' + ] + + +def ade_classes(): + """ADE20K class names for external use.""" + return [ + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag' + ] + + +def voc_classes(): + """Pascal VOC class names for external use.""" + return [ + 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', + 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor' + ] + + +def cityscapes_palette(): + """Cityscapes palette for external use.""" + return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], + [0, 0, 230], [119, 11, 32]] + + +def ade_palette(): + """ADE20K palette for external use.""" + return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + +def voc_palette(): + """Pascal VOC palette for external use.""" + return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + +dataset_aliases = { + 'cityscapes': ['cityscapes'], + 'ade': ['ade', 'ade20k'], + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'] +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_classes()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels + + +def get_palette(dataset): + """Get class palette (RGB) of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_palette()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels diff --git a/mmseg/core/evaluation/eval_hooks.py b/mmseg/core/evaluation/eval_hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..34c44c7fe30ad00c0b0ce62ab2a32f696d827474 --- /dev/null +++ b/mmseg/core/evaluation/eval_hooks.py @@ -0,0 +1,109 @@ +import os.path as osp + +from mmcv.runner import DistEvalHook as _DistEvalHook +from mmcv.runner import EvalHook as _EvalHook + + +class EvalHook(_EvalHook): + """Single GPU EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from mmseg.apis import single_gpu_test + runner.log_buffer.clear() + results = single_gpu_test( + runner.model, + self.dataloader, + show=False, + efficient_test=self.efficient_test) + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from mmseg.apis import single_gpu_test + runner.log_buffer.clear() + results = single_gpu_test(runner.model, self.dataloader, show=False) + self.evaluate(runner, results) + + +class DistEvalHook(_DistEvalHook): + """Distributed EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from mmseg.apis import multi_gpu_test + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect, + efficient_test=self.efficient_test) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from mmseg.apis import multi_gpu_test + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) diff --git a/mmseg/core/evaluation/metrics.py b/mmseg/core/evaluation/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..a216afefe6ccb80fd11173060ccc9cef5a6e44e2 --- /dev/null +++ b/mmseg/core/evaluation/metrics.py @@ -0,0 +1,326 @@ +from collections import OrderedDict + +import mmcv +import numpy as np +import torch + + +def f_score(precision, recall, beta=1): + """calcuate the f-score value. + + Args: + precision (float | torch.Tensor): The precision value. + recall (float | torch.Tensor): The recall value. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + Returns: + [torch.tensor]: The f-score value. + """ + score = (1 + beta**2) * (precision * recall) / ( + (beta**2 * precision) + recall) + return score + + +def intersect_and_union(pred_label, + label, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate intersection and Union. + + Args: + pred_label (ndarray | str): Prediction segmentation map + or predict result filename. + label (ndarray | str): Ground truth segmentation map + or label filename. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. The parameter will + work only when label is str. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. The parameter will + work only when label is str. Default: False. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + if isinstance(pred_label, str): + pred_label = torch.from_numpy(np.load(pred_label)) + else: + pred_label = torch.from_numpy((pred_label)) + + if isinstance(label, str): + label = torch.from_numpy( + mmcv.imread(label, flag='unchanged', backend='pillow')) + else: + label = torch.from_numpy(label) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + return area_intersect, area_union, area_pred_label, area_label + + +def total_intersect_and_union(results, + gt_seg_maps, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate Total Intersection and Union. + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + ndarray: The intersection of prediction and ground truth histogram + on all classes. + ndarray: The union of prediction and ground truth histogram on all + classes. + ndarray: The prediction histogram on all classes. + ndarray: The ground truth histogram on all classes. + """ + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_union = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_label = torch.zeros((num_classes, ), dtype=torch.float64) + for i in range(num_imgs): + area_intersect, area_union, area_pred_label, area_label = \ + intersect_and_union( + results[i], gt_seg_maps[i], num_classes, ignore_index, + label_map, reduce_zero_label) + total_area_intersect += area_intersect + total_area_union += area_union + total_area_pred_label += area_pred_label + total_area_label += area_label + return total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label + + +def mean_iou(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category IoU, shape (num_classes, ). + """ + iou_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mIoU'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return iou_result + + +def mean_dice(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Dice (mDice) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category dice, shape (num_classes, ). + """ + + dice_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mDice'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return dice_result + + +def mean_fscore(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category recall, shape (num_classes, ). + ndarray: Per category precision, shape (num_classes, ). + ndarray: Per category f-score, shape (num_classes, ). + """ + fscore_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mFscore'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + beta=beta) + return fscore_result + + +def eval_metrics(results, + gt_seg_maps, + num_classes, + ignore_index, + metrics=['mIoU'], + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate evaluation metrics + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + if isinstance(metrics, str): + metrics = [metrics] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metrics).issubset(set(allowed_metrics)): + raise KeyError('metrics {} is not supported'.format(metrics)) + + total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label = total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map, + reduce_zero_label) + all_acc = total_area_intersect.sum() / total_area_label.sum() + ret_metrics = OrderedDict({'aAcc': all_acc}) + for metric in metrics: + if metric == 'mIoU': + iou = total_area_intersect / total_area_union + acc = total_area_intersect / total_area_label + ret_metrics['IoU'] = iou + ret_metrics['Acc'] = acc + elif metric == 'mDice': + dice = 2 * total_area_intersect / ( + total_area_pred_label + total_area_label) + acc = total_area_intersect / total_area_label + ret_metrics['Dice'] = dice + ret_metrics['Acc'] = acc + elif metric == 'mFscore': + precision = total_area_intersect / total_area_pred_label + recall = total_area_intersect / total_area_label + f_value = torch.tensor( + [f_score(x[0], x[1], beta) for x in zip(precision, recall)]) + ret_metrics['Fscore'] = f_value + ret_metrics['Precision'] = precision + ret_metrics['Recall'] = recall + + ret_metrics = { + metric: value.numpy() + for metric, value in ret_metrics.items() + } + if nan_to_num is not None: + ret_metrics = OrderedDict({ + metric: np.nan_to_num(metric_value, nan=nan_to_num) + for metric, metric_value in ret_metrics.items() + }) + return ret_metrics diff --git a/mmseg/core/seg/__init__.py b/mmseg/core/seg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..93bc129b685e4a3efca2cc891729981b2865900d --- /dev/null +++ b/mmseg/core/seg/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_pixel_sampler +from .sampler import BasePixelSampler, OHEMPixelSampler + +__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/mmseg/core/seg/builder.py b/mmseg/core/seg/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f5a117ce7b221f749d9def4b8ed97c07fe90b6e3 --- /dev/null +++ b/mmseg/core/seg/builder.py @@ -0,0 +1,8 @@ +from mmcv.utils import Registry, build_from_cfg + +PIXEL_SAMPLERS = Registry('pixel sampler') + + +def build_pixel_sampler(cfg, **default_args): + """Build pixel sampler for segmentation map.""" + return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) diff --git a/mmseg/core/seg/sampler/__init__.py b/mmseg/core/seg/sampler/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..332b242c03d1c5e80d4577df442a9a037b1816e1 --- /dev/null +++ b/mmseg/core/seg/sampler/__init__.py @@ -0,0 +1,4 @@ +from .base_pixel_sampler import BasePixelSampler +from .ohem_pixel_sampler import OHEMPixelSampler + +__all__ = ['BasePixelSampler', 'OHEMPixelSampler'] diff --git a/mmseg/core/seg/sampler/base_pixel_sampler.py b/mmseg/core/seg/sampler/base_pixel_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..b75b1566c9f18169cee51d4b55d75e0357b69c57 --- /dev/null +++ b/mmseg/core/seg/sampler/base_pixel_sampler.py @@ -0,0 +1,12 @@ +from abc import ABCMeta, abstractmethod + + +class BasePixelSampler(metaclass=ABCMeta): + """Base class of pixel sampler.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def sample(self, seg_logit, seg_label): + """Placeholder for sample function.""" diff --git a/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/mmseg/core/seg/sampler/ohem_pixel_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..88bb10d44026ba9f21756eaea9e550841cd59b9f --- /dev/null +++ b/mmseg/core/seg/sampler/ohem_pixel_sampler.py @@ -0,0 +1,76 @@ +import torch +import torch.nn.functional as F + +from ..builder import PIXEL_SAMPLERS +from .base_pixel_sampler import BasePixelSampler + + +@PIXEL_SAMPLERS.register_module() +class OHEMPixelSampler(BasePixelSampler): + """Online Hard Example Mining Sampler for segmentation. + + Args: + context (nn.Module): The context of sampler, subclass of + :obj:`BaseDecodeHead`. + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: None. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + """ + + def __init__(self, context, thresh=None, min_kept=100000): + super(OHEMPixelSampler, self).__init__() + self.context = context + assert min_kept > 1 + self.thresh = thresh + self.min_kept = min_kept + + def sample(self, seg_logit, seg_label): + """Sample pixels that have high loss or with low prediction confidence. + + Args: + seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) + seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) + + Returns: + torch.Tensor: segmentation weight, shape (N, H, W) + """ + with torch.no_grad(): + assert seg_logit.shape[2:] == seg_label.shape[2:] + assert seg_label.shape[1] == 1 + seg_label = seg_label.squeeze(1).long() + batch_kept = self.min_kept * seg_label.size(0) + valid_mask = seg_label != self.context.ignore_index + seg_weight = seg_logit.new_zeros(size=seg_label.size()) + valid_seg_weight = seg_weight[valid_mask] + if self.thresh is not None: + seg_prob = F.softmax(seg_logit, dim=1) + + tmp_seg_label = seg_label.clone().unsqueeze(1) + tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 + seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) + sort_prob, sort_indices = seg_prob[valid_mask].sort() + + if sort_prob.numel() > 0: + min_threshold = sort_prob[min(batch_kept, + sort_prob.numel() - 1)] + else: + min_threshold = 0.0 + threshold = max(min_threshold, self.thresh) + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. + else: + losses = self.context.loss_decode( + seg_logit, + seg_label, + weight=None, + ignore_index=self.context.ignore_index, + reduction_override='none') + # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa + _, sort_indices = losses[valid_mask].sort(descending=True) + valid_seg_weight[sort_indices[:batch_kept]] = 1. + + seg_weight[valid_mask] = valid_seg_weight + + return seg_weight diff --git a/mmseg/core/utils/__init__.py b/mmseg/core/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f2678b321c295bcceaef945111ac3524be19d6e4 --- /dev/null +++ b/mmseg/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .misc import add_prefix + +__all__ = ['add_prefix'] diff --git a/mmseg/core/utils/misc.py b/mmseg/core/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..eb862a82bd47c8624db3dd5c6fb6ad8a03b62466 --- /dev/null +++ b/mmseg/core/utils/misc.py @@ -0,0 +1,17 @@ +def add_prefix(inputs, prefix): + """Add prefix for dict. + + Args: + inputs (dict): The input dict with str keys. + prefix (str): The prefix to add. + + Returns: + + dict: The dict with keys updated with ``prefix``. + """ + + outputs = dict() + for name, value in inputs.items(): + outputs[f'{prefix}.{name}'] = value + + return outputs diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ebeaef4a28ef655e43578552a8aef6b77f13a636 --- /dev/null +++ b/mmseg/datasets/__init__.py @@ -0,0 +1,19 @@ +from .ade import ADE20KDataset +from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset +from .chase_db1 import ChaseDB1Dataset +from .cityscapes import CityscapesDataset +from .custom import CustomDataset +from .dataset_wrappers import ConcatDataset, RepeatDataset +from .drive import DRIVEDataset +from .hrf import HRFDataset +from .pascal_context import PascalContextDataset, PascalContextDataset59 +from .stare import STAREDataset +from .voc import PascalVOCDataset + +__all__ = [ + 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', + 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', + 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', + 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', + 'STAREDataset' +] diff --git a/mmseg/datasets/ade.py b/mmseg/datasets/ade.py new file mode 100644 index 0000000000000000000000000000000000000000..5913e43775ed4920b6934c855eb5a37c54218ebf --- /dev/null +++ b/mmseg/datasets/ade.py @@ -0,0 +1,84 @@ +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ADE20KDataset(CustomDataset): + """ADE20K dataset. + + In segmentation map annotation for ADE20K, 0 stands for background, which + is not included in 150 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + CLASSES = ( + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + def __init__(self, **kwargs): + super(ADE20KDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) diff --git a/mmseg/datasets/builder.py b/mmseg/datasets/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f7a9926111cad3c8ab140ab8d289dbc66053517a --- /dev/null +++ b/mmseg/datasets/builder.py @@ -0,0 +1,169 @@ +import copy +import platform +import random +from functools import partial + +import numpy as np +from mmcv.parallel import collate +from mmcv.runner import get_dist_info +from mmcv.utils import Registry, build_from_cfg +from mmcv.utils.parrots_wrapper import DataLoader, PoolDataLoader +from torch.utils.data import DistributedSampler + +if platform.system() != 'Windows': + # https://github.com/pytorch/pytorch/issues/973 + import resource + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + hard_limit = rlimit[1] + soft_limit = min(4096, hard_limit) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) + +DATASETS = Registry('dataset') +PIPELINES = Registry('pipeline') + + +def _concat_dataset(cfg, default_args=None): + """Build :obj:`ConcatDataset by.""" + from .dataset_wrappers import ConcatDataset + img_dir = cfg['img_dir'] + ann_dir = cfg.get('ann_dir', None) + split = cfg.get('split', None) + num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1 + if ann_dir is not None: + num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1 + else: + num_ann_dir = 0 + if split is not None: + num_split = len(split) if isinstance(split, (list, tuple)) else 1 + else: + num_split = 0 + if num_img_dir > 1: + assert num_img_dir == num_ann_dir or num_ann_dir == 0 + assert num_img_dir == num_split or num_split == 0 + else: + assert num_split == num_ann_dir or num_ann_dir <= 1 + num_dset = max(num_split, num_img_dir) + + datasets = [] + for i in range(num_dset): + data_cfg = copy.deepcopy(cfg) + if isinstance(img_dir, (list, tuple)): + data_cfg['img_dir'] = img_dir[i] + if isinstance(ann_dir, (list, tuple)): + data_cfg['ann_dir'] = ann_dir[i] + if isinstance(split, (list, tuple)): + data_cfg['split'] = split[i] + datasets.append(build_dataset(data_cfg, default_args)) + + return ConcatDataset(datasets) + + +def build_dataset(cfg, default_args=None): + """Build datasets.""" + from .dataset_wrappers import ConcatDataset, RepeatDataset + if isinstance(cfg, (list, tuple)): + dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) + elif cfg['type'] == 'RepeatDataset': + dataset = RepeatDataset( + build_dataset(cfg['dataset'], default_args), cfg['times']) + elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance( + cfg.get('split', None), (list, tuple)): + dataset = _concat_dataset(cfg, default_args) + else: + dataset = build_from_cfg(cfg, DATASETS, default_args) + + return dataset + + +def build_dataloader(dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + drop_last=False, + pin_memory=True, + dataloader_type='PoolDataLoader', + **kwargs): + """Build PyTorch DataLoader. + + In distributed training, each GPU/process has a dataloader. + In non-distributed training, there is only one dataloader for all GPUs. + + Args: + dataset (Dataset): A PyTorch dataset. + samples_per_gpu (int): Number of training samples on each GPU, i.e., + batch size of each GPU. + workers_per_gpu (int): How many subprocesses to use for data loading + for each GPU. + num_gpus (int): Number of GPUs. Only used in non-distributed training. + dist (bool): Distributed training/test or not. Default: True. + shuffle (bool): Whether to shuffle the data at every epoch. + Default: True. + seed (int | None): Seed to be used. Default: None. + drop_last (bool): Whether to drop the last incomplete batch in epoch. + Default: False + pin_memory (bool): Whether to use pin_memory in DataLoader. + Default: True + dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader' + kwargs: any keyword argument to be used to initialize DataLoader + + Returns: + DataLoader: A PyTorch dataloader. + """ + rank, world_size = get_dist_info() + if dist: + sampler = DistributedSampler( + dataset, world_size, rank, shuffle=shuffle) + shuffle = False + batch_size = samples_per_gpu + num_workers = workers_per_gpu + else: + sampler = None + batch_size = num_gpus * samples_per_gpu + num_workers = num_gpus * workers_per_gpu + + init_fn = partial( + worker_init_fn, num_workers=num_workers, rank=rank, + seed=seed) if seed is not None else None + + assert dataloader_type in ( + 'DataLoader', + 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' + + if dataloader_type == 'PoolDataLoader': + dataloader = PoolDataLoader + elif dataloader_type == 'DataLoader': + dataloader = DataLoader + + data_loader = dataloader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), + pin_memory=pin_memory, + shuffle=shuffle, + worker_init_fn=init_fn, + drop_last=drop_last, + **kwargs) + + return data_loader + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = num_workers * rank + worker_id + seed + np.random.seed(worker_seed) + random.seed(worker_seed) diff --git a/mmseg/datasets/chase_db1.py b/mmseg/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..8bc29bea14704a4407f83474610cbc3bef32c708 --- /dev/null +++ b/mmseg/datasets/chase_db1.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ChaseDB1Dataset(CustomDataset): + """Chase_db1 dataset. + + In segmentation map annotation for Chase_db1, 0 stands for background, + which is included in 2 categories. ``reduce_zero_label`` is fixed to False. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_1stHO.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(ChaseDB1Dataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_1stHO.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/mmseg/datasets/cityscapes.py b/mmseg/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..fa9958ac1401644420d264c48cf8d807a44d7cf9 --- /dev/null +++ b/mmseg/datasets/cityscapes.py @@ -0,0 +1,217 @@ +import os.path as osp +import tempfile + +import mmcv +import numpy as np +from mmcv.utils import print_log +from PIL import Image + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class CityscapesDataset(CustomDataset): + """Cityscapes dataset. + + The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is + fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. + """ + + CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle') + + PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], + [0, 80, 100], [0, 0, 230], [119, 11, 32]] + + def __init__(self, **kwargs): + super(CityscapesDataset, self).__init__( + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtFine_labelTrainIds.png', + **kwargs) + + @staticmethod + def _convert_to_label_id(result): + """Convert trainId to id for cityscapes.""" + if isinstance(result, str): + result = np.load(result) + import cityscapesscripts.helpers.labels as CSLabels + result_copy = result.copy() + for trainId, label in CSLabels.trainId2label.items(): + result_copy[result == trainId] = label.id + + return result_copy + + def results2img(self, results, imgfile_prefix, to_label_id): + """Write the segmentation results to images. + + Args: + results (list[list | tuple | ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + to_label_id (bool): whether convert output to label_id for + submission + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + prog_bar = mmcv.ProgressBar(len(self)) + for idx in range(len(self)): + result = results[idx] + if to_label_id: + result = self._convert_to_label_id(result) + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + output = Image.fromarray(result.astype(np.uint8)).convert('P') + import cityscapesscripts.helpers.labels as CSLabels + palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) + for label_id, label in CSLabels.id2label.items(): + palette[label_id] = label.color + + output.putpalette(palette) + output.save(png_filename) + result_files.append(png_filename) + prog_bar.update() + + return result_files + + def format_results(self, results, imgfile_prefix=None, to_label_id=True): + """Format the results into dir (standard format for Cityscapes + evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str | None): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". If not specified, a temp file will be created. + Default: None. + to_label_id (bool): whether convert output to label_id for + submission. Default: False + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + + assert isinstance(results, list), 'results must be a list' + assert len(results) == len(self), ( + 'The length of results is not equal to the dataset len: ' + f'{len(results)} != {len(self)}') + + if imgfile_prefix is None: + tmp_dir = tempfile.TemporaryDirectory() + imgfile_prefix = tmp_dir.name + else: + tmp_dir = None + result_files = self.results2img(results, imgfile_prefix, to_label_id) + + return result_files, tmp_dir + + def evaluate(self, + results, + metric='mIoU', + logger=None, + imgfile_prefix=None, + efficient_test=False): + """Evaluation in Cityscapes/default protocol. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file, + for cityscapes evaluation only. It includes the file path and + the prefix of filename, e.g., "a/b/prefix". + If results are evaluated with cityscapes protocol, it would be + the prefix of output png files. The output files would be + png images under folder "a/b/prefix/xxx.png", where "xxx" is + the image name of cityscapes. If not specified, a temp file + will be created for evaluation. + Default: None. + + Returns: + dict[str, float]: Cityscapes/default metrics. + """ + + eval_results = dict() + metrics = metric.copy() if isinstance(metric, list) else [metric] + if 'cityscapes' in metrics: + eval_results.update( + self._evaluate_cityscapes(results, logger, imgfile_prefix)) + metrics.remove('cityscapes') + if len(metrics) > 0: + eval_results.update( + super(CityscapesDataset, + self).evaluate(results, metrics, logger, efficient_test)) + + return eval_results + + def _evaluate_cityscapes(self, results, logger, imgfile_prefix): + """Evaluation in Cityscapes protocol. + + Args: + results (list): Testing results of the dataset. + logger (logging.Logger | str | None): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file + + Returns: + dict[str: float]: Cityscapes evaluation results. + """ + try: + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + except ImportError: + raise ImportError('Please run "pip install cityscapesscripts" to ' + 'install cityscapesscripts first.') + msg = 'Evaluating in Cityscapes style' + if logger is None: + msg = '\n' + msg + print_log(msg, logger=logger) + + result_files, tmp_dir = self.format_results(results, imgfile_prefix) + + if tmp_dir is None: + result_dir = imgfile_prefix + else: + result_dir = tmp_dir.name + + eval_results = dict() + print_log(f'Evaluating results under {result_dir} ...', logger=logger) + + CSEval.args.evalInstLevelScore = True + CSEval.args.predictionPath = osp.abspath(result_dir) + CSEval.args.evalPixelAccuracy = True + CSEval.args.JSONOutput = False + + seg_map_list = [] + pred_list = [] + + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + for seg_map in mmcv.scandir( + self.ann_dir, 'gtFine_labelIds.png', recursive=True): + seg_map_list.append(osp.join(self.ann_dir, seg_map)) + pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) + + eval_results.update( + CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + + if tmp_dir is not None: + tmp_dir.cleanup() + + return eval_results diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py new file mode 100644 index 0000000000000000000000000000000000000000..9c88235e39b845fbacfb72c8b8d5ae6412619437 --- /dev/null +++ b/mmseg/datasets/custom.py @@ -0,0 +1,400 @@ +import os +import os.path as osp +from collections import OrderedDict +from functools import reduce + +import mmcv +import numpy as np +from mmcv.utils import print_log +from prettytable import PrettyTable +from torch.utils.data import Dataset + +from mmseg.core import eval_metrics +from mmseg.utils import get_root_logger +from .builder import DATASETS +from .pipelines import Compose + + +@DATASETS.register_module() +class CustomDataset(Dataset): + """Custom dataset for semantic segmentation. An example of file structure + is as followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The img/gt_semantic_seg pair of CustomDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/tutorials/new_dataset.md`` for more details. + + + Args: + pipeline (list[dict]): Processing pipeline + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. Default: '.jpg' + ann_dir (str, optional): Path to annotation directory. Default: None + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + split (str, optional): Split txt file. If split is specified, only + file with suffix in the splits will be loaded. Otherwise, all + images in img_dir/ann_dir will be loaded. Default: None + data_root (str, optional): Data root for img_dir/ann_dir. Default: + None. + test_mode (bool): If test_mode=True, gt wouldn't be loaded. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default: False + classes (str | Sequence[str], optional): Specify classes to load. + If is None, ``cls.CLASSES`` will be used. Default: None. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, and + self.PALETTE is None, random palette will be generated. + Default: None + """ + + CLASSES = None + + PALETTE = None + + def __init__(self, + pipeline, + img_dir, + img_suffix='.jpg', + ann_dir=None, + seg_map_suffix='.png', + split=None, + data_root=None, + test_mode=False, + ignore_index=255, + reduce_zero_label=False, + classes=None, + palette=None): + self.pipeline = Compose(pipeline) + self.img_dir = img_dir + self.img_suffix = img_suffix + self.ann_dir = ann_dir + self.seg_map_suffix = seg_map_suffix + self.split = split + self.data_root = data_root + self.test_mode = test_mode + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.label_map = None + self.CLASSES, self.PALETTE = self.get_classes_and_palette( + classes, palette) + + # join paths if data_root is specified + if self.data_root is not None: + if not osp.isabs(self.img_dir): + self.img_dir = osp.join(self.data_root, self.img_dir) + if not (self.ann_dir is None or osp.isabs(self.ann_dir)): + self.ann_dir = osp.join(self.data_root, self.ann_dir) + if not (self.split is None or osp.isabs(self.split)): + self.split = osp.join(self.data_root, self.split) + + # load annotations + self.img_infos = self.load_annotations(self.img_dir, self.img_suffix, + self.ann_dir, + self.seg_map_suffix, self.split) + + def __len__(self): + """Total number of samples of data.""" + return len(self.img_infos) + + def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, + split): + """Load annotation from directory. + + Args: + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. + ann_dir (str|None): Path to annotation directory. + seg_map_suffix (str|None): Suffix of segmentation maps. + split (str|None): Split txt file. If split is specified, only file + with suffix in the splits will be loaded. Otherwise, all images + in img_dir/ann_dir will be loaded. Default: None + + Returns: + list[dict]: All image info of dataset. + """ + + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = dict(filename=img_name + img_suffix) + if ann_dir is not None: + seg_map = img_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in mmcv.scandir(img_dir, img_suffix, recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_map = img.replace(img_suffix, seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + + print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) + return img_infos + + def get_ann_info(self, idx): + """Get annotation by index. + + Args: + idx (int): Index of data. + + Returns: + dict: Annotation info of specified index. + """ + + return self.img_infos[idx]['ann'] + + def pre_pipeline(self, results): + """Prepare results dict for pipeline.""" + results['seg_fields'] = [] + results['img_prefix'] = self.img_dir + results['seg_prefix'] = self.ann_dir + if self.custom_classes: + results['label_map'] = self.label_map + + def __getitem__(self, idx): + """Get training/test data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training/test data (with annotation if `test_mode` is set + False). + """ + + if self.test_mode: + return self.prepare_test_img(idx) + else: + return self.prepare_train_img(idx) + + def prepare_train_img(self, idx): + """Get training data and annotations after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training data and annotation after pipeline with new keys + introduced by pipeline. + """ + + img_info = self.img_infos[idx] + ann_info = self.get_ann_info(idx) + results = dict(img_info=img_info, ann_info=ann_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def prepare_test_img(self, idx): + """Get testing data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Testing data after pipeline with new keys introduced by + pipeline. + """ + + img_info = self.img_infos[idx] + results = dict(img_info=img_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def format_results(self, results, **kwargs): + """Place holder to format result to dataset specific output.""" + + def get_gt_seg_maps(self, efficient_test=False): + """Get ground truth segmentation maps for evaluation.""" + gt_seg_maps = [] + for img_info in self.img_infos: + seg_map = osp.join(self.ann_dir, img_info['ann']['seg_map']) + if efficient_test: + gt_seg_map = seg_map + else: + gt_seg_map = mmcv.imread( + seg_map, flag='unchanged', backend='pillow') + gt_seg_maps.append(gt_seg_map) + return gt_seg_maps + + def get_classes_and_palette(self, classes=None, palette=None): + """Get class names of current dataset. + + Args: + classes (Sequence[str] | str | None): If classes is None, use + default CLASSES defined by builtin dataset. If classes is a + string, take it as a file name. The file contains the name of + classes where each line contains one class name. If classes is + a tuple or list, override the CLASSES defined by the dataset. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, random + palette will be generated. Default: None + """ + if classes is None: + self.custom_classes = False + return self.CLASSES, self.PALETTE + + self.custom_classes = True + if isinstance(classes, str): + # take it as a file path + class_names = mmcv.list_from_file(classes) + elif isinstance(classes, (tuple, list)): + class_names = classes + else: + raise ValueError(f'Unsupported type {type(classes)} of classes.') + + if self.CLASSES: + if not set(classes).issubset(self.CLASSES): + raise ValueError('classes is not a subset of CLASSES.') + + # dictionary, its keys are the old label ids and its values + # are the new label ids. + # used for changing pixel labels in load_annotations. + self.label_map = {} + for i, c in enumerate(self.CLASSES): + if c not in class_names: + self.label_map[i] = -1 + else: + self.label_map[i] = classes.index(c) + + palette = self.get_palette_for_custom_classes(class_names, palette) + + return class_names, palette + + def get_palette_for_custom_classes(self, class_names, palette=None): + + if self.label_map is not None: + # return subset of palette + palette = [] + for old_id, new_id in sorted( + self.label_map.items(), key=lambda x: x[1]): + if new_id != -1: + palette.append(self.PALETTE[old_id]) + palette = type(self.PALETTE)(palette) + + elif palette is None: + if self.PALETTE is None: + palette = np.random.randint(0, 255, size=(len(class_names), 3)) + else: + palette = self.PALETTE + + return palette + + def evaluate(self, + results, + metric='mIoU', + logger=None, + efficient_test=False, + **kwargs): + """Evaluate the dataset. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. 'mIoU', + 'mDice' and 'mFscore' are supported. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + + Returns: + dict[str, float]: Default metrics. + """ + + if isinstance(metric, str): + metric = [metric] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metric).issubset(set(allowed_metrics)): + raise KeyError('metric {} is not supported'.format(metric)) + eval_results = {} + gt_seg_maps = self.get_gt_seg_maps(efficient_test) + if self.CLASSES is None: + num_classes = len( + reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) + else: + num_classes = len(self.CLASSES) + ret_metrics = eval_metrics( + results, + gt_seg_maps, + num_classes, + self.ignore_index, + metric, + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label) + + if self.CLASSES is None: + class_names = tuple(range(num_classes)) + else: + class_names = self.CLASSES + + # summary table + ret_metrics_summary = OrderedDict({ + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + + # each class table + ret_metrics.pop('aAcc', None) + ret_metrics_class = OrderedDict({ + ret_metric: np.round(ret_metric_value * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + ret_metrics_class.update({'Class': class_names}) + ret_metrics_class.move_to_end('Class', last=False) + + # for logger + class_table_data = PrettyTable() + for key, val in ret_metrics_class.items(): + class_table_data.add_column(key, val) + + summary_table_data = PrettyTable() + for key, val in ret_metrics_summary.items(): + if key == 'aAcc': + summary_table_data.add_column(key, [val]) + else: + summary_table_data.add_column('m' + key, [val]) + + print_log('per class results:', logger) + print_log('\n' + class_table_data.get_string(), logger=logger) + print_log('Summary:', logger) + print_log('\n' + summary_table_data.get_string(), logger=logger) + + # each metric dict + for key, value in ret_metrics_summary.items(): + if key == 'aAcc': + eval_results[key] = value / 100.0 + else: + eval_results['m' + key] = value / 100.0 + + ret_metrics_class.pop('Class', None) + for key, value in ret_metrics_class.items(): + eval_results.update({ + key + '.' + str(name): value[idx] / 100.0 + for idx, name in enumerate(class_names) + }) + + if mmcv.is_list_of(results, str): + for file_name in results: + os.remove(file_name) + return eval_results diff --git a/mmseg/datasets/dataset_wrappers.py b/mmseg/datasets/dataset_wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..d6a5e957ec3b44465432617cf6e8f0b86a8a5efa --- /dev/null +++ b/mmseg/datasets/dataset_wrappers.py @@ -0,0 +1,50 @@ +from torch.utils.data.dataset import ConcatDataset as _ConcatDataset + +from .builder import DATASETS + + +@DATASETS.register_module() +class ConcatDataset(_ConcatDataset): + """A wrapper of concatenated dataset. + + Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but + concat the group flag for image aspect ratio. + + Args: + datasets (list[:obj:`Dataset`]): A list of datasets. + """ + + def __init__(self, datasets): + super(ConcatDataset, self).__init__(datasets) + self.CLASSES = datasets[0].CLASSES + self.PALETTE = datasets[0].PALETTE + + +@DATASETS.register_module() +class RepeatDataset(object): + """A wrapper of repeated dataset. + + The length of repeated dataset will be `times` larger than the original + dataset. This is useful when the data loading time is long but the dataset + is small. Using RepeatDataset can reduce the data loading time between + epochs. + + Args: + dataset (:obj:`Dataset`): The dataset to be repeated. + times (int): Repeat times. + """ + + def __init__(self, dataset, times): + self.dataset = dataset + self.times = times + self.CLASSES = dataset.CLASSES + self.PALETTE = dataset.PALETTE + self._ori_len = len(self.dataset) + + def __getitem__(self, idx): + """Get item from original dataset.""" + return self.dataset[idx % self._ori_len] + + def __len__(self): + """The length is multiplied by ``times``""" + return self.times * self._ori_len diff --git a/mmseg/datasets/drive.py b/mmseg/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..3cbfda8ae74bdf26c5aef197ff2866a7c7ad0cfd --- /dev/null +++ b/mmseg/datasets/drive.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class DRIVEDataset(CustomDataset): + """DRIVE dataset. + + In segmentation map annotation for DRIVE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(DRIVEDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_manual1.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/mmseg/datasets/hrf.py b/mmseg/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..923203b51377f9344277fc561803d7a78bd2c684 --- /dev/null +++ b/mmseg/datasets/hrf.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class HRFDataset(CustomDataset): + """HRF dataset. + + In segmentation map annotation for HRF, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(HRFDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/mmseg/datasets/pascal_context.py b/mmseg/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..541a63c66a13fb16fd52921e755715ad8d078fdd --- /dev/null +++ b/mmseg/datasets/pascal_context.py @@ -0,0 +1,103 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalContextDataset(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('background', 'aeroplane', 'bag', 'bed', 'bedclothes', 'bench', + 'bicycle', 'bird', 'boat', 'book', 'bottle', 'building', 'bus', + 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth', + 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence', + 'floor', 'flower', 'food', 'grass', 'ground', 'horse', + 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', 'person', + 'plate', 'platform', 'pottedplant', 'road', 'rock', 'sheep', + 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', 'table', + 'track', 'train', 'tree', 'truck', 'tvmonitor', 'wall', 'water', + 'window', 'wood') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None + + +@DATASETS.register_module() +class PascalContextDataset59(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', + 'bird', 'boat', 'book', 'bottle', 'building', 'bus', 'cabinet', + 'car', 'cat', 'ceiling', 'chair', 'cloth', 'computer', 'cow', + 'cup', 'curtain', 'dog', 'door', 'fence', 'floor', 'flower', + 'food', 'grass', 'ground', 'horse', 'keyboard', 'light', + 'motorbike', 'mountain', 'mouse', 'person', 'plate', 'platform', + 'pottedplant', 'road', 'rock', 'sheep', 'shelves', 'sidewalk', + 'sign', 'sky', 'snow', 'sofa', 'table', 'track', 'train', + 'tree', 'truck', 'tvmonitor', 'wall', 'water', 'window', 'wood') + + PALETTE = [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], + [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], + [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], + [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], + [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], + [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], + [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], + [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], + [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], + [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], + [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], + [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], + [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], + [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], + [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset59, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=True, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/mmseg/datasets/pipelines/__init__.py b/mmseg/datasets/pipelines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b9046b07bb4ddea7a707a392b42e72db7c9df67 --- /dev/null +++ b/mmseg/datasets/pipelines/__init__.py @@ -0,0 +1,16 @@ +from .compose import Compose +from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, + Transpose, to_tensor) +from .loading import LoadAnnotations, LoadImageFromFile +from .test_time_aug import MultiScaleFlipAug +from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, + PhotoMetricDistortion, RandomCrop, RandomFlip, + RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) + +__all__ = [ + 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', + 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', + 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', + 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', + 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray' +] diff --git a/mmseg/datasets/pipelines/compose.py b/mmseg/datasets/pipelines/compose.py new file mode 100644 index 0000000000000000000000000000000000000000..ca48f1c935755c486edc2744e1713e2b5ba3cdc8 --- /dev/null +++ b/mmseg/datasets/pipelines/compose.py @@ -0,0 +1,51 @@ +import collections + +from mmcv.utils import build_from_cfg + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Compose(object): + """Compose multiple transforms sequentially. + + Args: + transforms (Sequence[dict | callable]): Sequence of transform object or + config dict to be composed. + """ + + def __init__(self, transforms): + assert isinstance(transforms, collections.abc.Sequence) + self.transforms = [] + for transform in transforms: + if isinstance(transform, dict): + transform = build_from_cfg(transform, PIPELINES) + self.transforms.append(transform) + elif callable(transform): + self.transforms.append(transform) + else: + raise TypeError('transform must be callable or a dict') + + def __call__(self, data): + """Call function to apply transforms sequentially. + + Args: + data (dict): A result dict contains the data to transform. + + Returns: + dict: Transformed data. + """ + + for t in self.transforms: + data = t(data) + if data is None: + return None + return data + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string diff --git a/mmseg/datasets/pipelines/formating.py b/mmseg/datasets/pipelines/formating.py new file mode 100644 index 0000000000000000000000000000000000000000..34061c1dd160d4b00aac8dbdc82dccf5c3883ce8 --- /dev/null +++ b/mmseg/datasets/pipelines/formating.py @@ -0,0 +1,288 @@ +from collections.abc import Sequence + +import mmcv +import numpy as np +import torch +from mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + + Args: + data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to + be converted. + """ + + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor(object): + """Convert some results to :obj:`torch.Tensor` by given keys. + + Args: + keys (Sequence[str]): Keys that need to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert data in results to :obj:`torch.Tensor`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted + to :obj:`torch.Tensor`. + """ + + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class ImageToTensor(object): + """Convert image to :obj:`torch.Tensor` by given keys. + + The dimension order of input image is (H, W, C). The pipeline will convert + it to (C, H, W). If only 2 dimension (H, W) is given, the output would be + (1, H, W). + + Args: + keys (Sequence[str]): Key of images to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + img = results[key] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + results[key] = to_tensor(img.transpose(2, 0, 1)) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose(object): + """Transpose some results by given keys. + + Args: + keys (Sequence[str]): Keys of results to be transposed. + order (Sequence[int]): Order of transpose. + """ + + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, order={self.order})' + + +@PIPELINES.register_module() +class ToDataContainer(object): + """Convert results to :obj:`mmcv.DataContainer` by given fields. + + Args: + fields (Sequence[dict]): Each field is a dict like + ``dict(key='xxx', **kwargs)``. The ``key`` in result will + be converted to :obj:`mmcv.DataContainer` with ``**kwargs``. + Default: ``(dict(key='img', stack=True), + dict(key='gt_semantic_seg'))``. + """ + + def __init__(self, + fields=(dict(key='img', + stack=True), dict(key='gt_semantic_seg'))): + self.fields = fields + + def __call__(self, results): + """Call function to convert data in results to + :obj:`mmcv.DataContainer`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted to + :obj:`mmcv.DataContainer`. + """ + + for field in self.fields: + field = field.copy() + key = field.pop('key') + results[key] = DC(results[key], **field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class DefaultFormatBundle(object): + """Default formatting bundle. + + It simplifies the pipeline of formatting common fields, including "img" + and "gt_semantic_seg". These fields are formatted as follows. + + - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, + (3)to DataContainer (stack=True) + """ + + def __call__(self, results): + """Call function to transform and format common fields in results. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data that is formatted with + default bundle. + """ + + if 'img' in results: + img = results['img'] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)) + results['img'] = DC(to_tensor(img), stack=True) + if 'gt_semantic_seg' in results: + # convert to long + results['gt_semantic_seg'] = DC( + to_tensor(results['gt_semantic_seg'][None, + ...].astype(np.int64)), + stack=True) + return results + + def __repr__(self): + return self.__class__.__name__ + + +@PIPELINES.register_module() +class Collect(object): + """Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "gt_semantic_seg". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right + if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + + Args: + keys (Sequence[str]): Keys of results to be collected in ``data``. + meta_keys (Sequence[str], optional): Meta keys to be converted to + ``mmcv.DataContainer`` and collected in ``data[img_metas]``. + Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'img_norm_cfg')`` + """ + + def __init__(self, + keys, + meta_keys=('filename', 'ori_filename', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'img_norm_cfg')): + self.keys = keys + self.meta_keys = meta_keys + + def __call__(self, results): + """Call function to collect keys in results. The keys in ``meta_keys`` + will be converted to :obj:mmcv.DataContainer. + + Args: + results (dict): Result dict contains the data to collect. + + Returns: + dict: The result dict contains the following keys + - keys in``self.keys`` + - ``img_metas`` + """ + + data = {} + img_meta = {} + for key in self.meta_keys: + img_meta[key] = results[key] + data['img_metas'] = DC(img_meta, cpu_only=True) + for key in self.keys: + data[key] = results[key] + return data + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/mmseg/datasets/pipelines/loading.py b/mmseg/datasets/pipelines/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..fdfc496ba96828a435febbef958fdae499d034f7 --- /dev/null +++ b/mmseg/datasets/pipelines/loading.py @@ -0,0 +1,153 @@ +import os.path as osp + +import mmcv +import numpy as np + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class LoadImageFromFile(object): + """Load an image from file. + + Required keys are "img_prefix" and "img_info" (a dict that must contain the + key "filename"). Added or updated keys are "filename", "img", "img_shape", + "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), + "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + color_type (str): The flag argument for :func:`mmcv.imfrombytes`. + Defaults to 'color'. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'cv2' + """ + + def __init__(self, + to_float32=False, + color_type='color', + file_client_args=dict(backend='disk'), + imdecode_backend='cv2'): + self.to_float32 = to_float32 + self.color_type = color_type + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call functions to load image and get image meta information. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('img_prefix') is not None: + filename = osp.join(results['img_prefix'], + results['img_info']['filename']) + else: + filename = results['img_info']['filename'] + img_bytes = self.file_client.get(filename) + img = mmcv.imfrombytes( + img_bytes, flag=self.color_type, backend=self.imdecode_backend) + if self.to_float32: + img = img.astype(np.float32) + + results['filename'] = filename + results['ori_filename'] = results['img_info']['filename'] + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + num_channels = 1 if len(img.shape) < 3 else img.shape[2] + results['img_norm_cfg'] = dict( + mean=np.zeros(num_channels, dtype=np.float32), + std=np.ones(num_channels, dtype=np.float32), + to_rgb=False) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(to_float32={self.to_float32},' + repr_str += f"color_type='{self.color_type}'," + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str + + +@PIPELINES.register_module() +class LoadAnnotations(object): + """Load annotations for semantic segmentation. + + Args: + reduce_zero_label (bool): Whether reduce all label value by 1. + Usually used for datasets where 0 is background label. + Default: False. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'pillow' + """ + + def __init__(self, + reduce_zero_label=False, + file_client_args=dict(backend='disk'), + imdecode_backend='pillow'): + self.reduce_zero_label = reduce_zero_label + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call function to load multiple types annotations. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded semantic segmentation annotations. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('seg_prefix', None) is not None: + filename = osp.join(results['seg_prefix'], + results['ann_info']['seg_map']) + else: + filename = results['ann_info']['seg_map'] + img_bytes = self.file_client.get(filename) + gt_semantic_seg = mmcv.imfrombytes( + img_bytes, flag='unchanged', + backend=self.imdecode_backend).squeeze().astype(np.uint8) + # modify if custom classes + if results.get('label_map', None) is not None: + for old_id, new_id in results['label_map'].items(): + gt_semantic_seg[gt_semantic_seg == old_id] = new_id + # reduce zero_label + if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 + results['gt_semantic_seg'] = gt_semantic_seg + results['seg_fields'].append('gt_semantic_seg') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(reduce_zero_label={self.reduce_zero_label},' + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str diff --git a/mmseg/datasets/pipelines/test_time_aug.py b/mmseg/datasets/pipelines/test_time_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..473a12bc86b57e564c415ff8bdb1e431425370db --- /dev/null +++ b/mmseg/datasets/pipelines/test_time_aug.py @@ -0,0 +1,133 @@ +import warnings + +import mmcv + +from ..builder import PIPELINES +from .compose import Compose + + +@PIPELINES.register_module() +class MultiScaleFlipAug(object): + """Test-time augmentation with multiple scales and flipping. + + An example configuration is as followed: + + .. code-block:: + + img_scale=(2048, 1024), + img_ratios=[0.5, 1.0], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ] + + After MultiScaleFLipAug with above configuration, the results are wrapped + into lists of the same length as followed: + + .. code-block:: + + dict( + img=[...], + img_shape=[...], + scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)] + flip=[False, True, False, True] + ... + ) + + Args: + transforms (list[dict]): Transforms to apply in each augmentation. + img_scale (None | tuple | list[tuple]): Images scales for resizing. + img_ratios (float | list[float]): Image ratios for resizing + flip (bool): Whether apply flip augmentation. Default: False. + flip_direction (str | list[str]): Flip augmentation directions, + options are "horizontal" and "vertical". If flip_direction is list, + multiple flip augmentations will be applied. + It has no effect when flip == False. Default: "horizontal". + """ + + def __init__(self, + transforms, + img_scale, + img_ratios=None, + flip=False, + flip_direction='horizontal'): + self.transforms = Compose(transforms) + if img_ratios is not None: + img_ratios = img_ratios if isinstance(img_ratios, + list) else [img_ratios] + assert mmcv.is_list_of(img_ratios, float) + if img_scale is None: + # mode 1: given img_scale=None and a range of image ratio + self.img_scale = None + assert mmcv.is_list_of(img_ratios, float) + elif isinstance(img_scale, tuple) and mmcv.is_list_of( + img_ratios, float): + assert len(img_scale) == 2 + # mode 2: given a scale and a range of image ratio + self.img_scale = [(int(img_scale[0] * ratio), + int(img_scale[1] * ratio)) + for ratio in img_ratios] + else: + # mode 3: given multiple scales + self.img_scale = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None + self.flip = flip + self.img_ratios = img_ratios + self.flip_direction = flip_direction if isinstance( + flip_direction, list) else [flip_direction] + assert mmcv.is_list_of(self.flip_direction, str) + if not self.flip and self.flip_direction != ['horizontal']: + warnings.warn( + 'flip_direction has no effect when flip is set to False') + if (self.flip + and not any([t['type'] == 'RandomFlip' for t in transforms])): + warnings.warn( + 'flip has no effect when RandomFlip is not in transforms') + + def __call__(self, results): + """Call function to apply test time augment transforms on results. + + Args: + results (dict): Result dict contains the data to transform. + + Returns: + dict[str: list]: The augmented data, where each value is wrapped + into a list. + """ + + aug_data = [] + if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): + h, w = results['img'].shape[:2] + img_scale = [(int(w * ratio), int(h * ratio)) + for ratio in self.img_ratios] + else: + img_scale = self.img_scale + flip_aug = [False, True] if self.flip else [False] + for scale in img_scale: + for flip in flip_aug: + for direction in self.flip_direction: + _results = results.copy() + _results['scale'] = scale + _results['flip'] = flip + _results['flip_direction'] = direction + data = self.transforms(_results) + aug_data.append(data) + # list of dict to dict of list + aug_data_dict = {key: [] for key in aug_data[0]} + for data in aug_data: + for key, val in data.items(): + aug_data_dict[key].append(val) + return aug_data_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(transforms={self.transforms}, ' + repr_str += f'img_scale={self.img_scale}, flip={self.flip})' + repr_str += f'flip_direction={self.flip_direction}' + return repr_str diff --git a/mmseg/datasets/pipelines/transforms.py b/mmseg/datasets/pipelines/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..20753bb0fa80a332403fd8981c92da73ef345e8f --- /dev/null +++ b/mmseg/datasets/pipelines/transforms.py @@ -0,0 +1,889 @@ +import mmcv +import numpy as np +from mmcv.utils import deprecated_api_warning, is_tuple_of +from numpy import random + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Resize(object): + """Resize images & seg. + + This transform resizes the input image to some scale. If the input dict + contains the key "scale", then the scale in the input dict is used, + otherwise the specified scale in the init method is used. + + ``img_scale`` can be None, a tuple (single-scale) or a list of tuple + (multi-scale). There are 4 multiscale modes: + + - ``ratio_range is not None``: + 1. When img_scale is None, img_scale is the shape of image in results + (img_scale = results['img'].shape[:2]) and the image is resized based + on the original size. (mode 1) + 2. When img_scale is a tuple (single-scale), randomly sample a ratio from + the ratio range and multiply it with the image scale. (mode 2) + + - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a + scale from the a range. (mode 3) + + - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a + scale from multiple scales. (mode 4) + + Args: + img_scale (tuple or list[tuple]): Images scales for resizing. + multiscale_mode (str): Either "range" or "value". + ratio_range (tuple[float]): (min_ratio, max_ratio) + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. + """ + + def __init__(self, + img_scale=None, + multiscale_mode='range', + ratio_range=None, + keep_ratio=True): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given img_scale=None and a range of image ratio + # mode 2: given a scale and a range of image ratio + assert self.img_scale is None or len(self.img_scale) == 1 + else: + # mode 3 and 4: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + + @staticmethod + def random_select(img_scales): + """Randomly select an img_scale from given candidates. + + Args: + img_scales (list[tuple]): Images scales for selection. + + Returns: + (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, + where ``img_scale`` is the selected image scale and + ``scale_idx`` is the selected index in the given candidates. + """ + + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + """Randomly sample an img_scale when ``multiscale_mode=='range'``. + + Args: + img_scales (list[tuple]): Images scale range for sampling. + There must be two tuples in img_scales, which specify the lower + and upper bound of image scales. + + Returns: + (tuple, None): Returns a tuple ``(img_scale, None)``, where + ``img_scale`` is sampled scale and None is just a placeholder + to be consistent with :func:`random_select`. + """ + + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + """Randomly sample an img_scale when ``ratio_range`` is specified. + + A ratio will be randomly sampled from the range specified by + ``ratio_range``. Then it would be multiplied with ``img_scale`` to + generate sampled scale. + + Args: + img_scale (tuple): Images scale base to multiply with ratio. + ratio_range (tuple[float]): The minimum and maximum ratio to scale + the ``img_scale``. + + Returns: + (tuple, None): Returns a tuple ``(scale, None)``, where + ``scale`` is sampled ratio multiplied with ``img_scale`` and + None is just a placeholder to be consistent with + :func:`random_select`. + """ + + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + """Randomly sample an img_scale according to ``ratio_range`` and + ``multiscale_mode``. + + If ``ratio_range`` is specified, a ratio will be sampled and be + multiplied with ``img_scale``. + If multiple scales are specified by ``img_scale``, a scale will be + sampled according to ``multiscale_mode``. + Otherwise, single scale will be used. + + Args: + results (dict): Result dict from :obj:`dataset`. + + Returns: + dict: Two new keys 'scale` and 'scale_idx` are added into + ``results``, which would be used by subsequent pipelines. + """ + + if self.ratio_range is not None: + if self.img_scale is None: + h, w = results['img'].shape[:2] + scale, scale_idx = self.random_sample_ratio((w, h), + self.ratio_range) + else: + scale, scale_idx = self.random_sample_ratio( + self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _resize_img(self, results): + """Resize images with ``results['scale']``.""" + if self.keep_ratio: + img, scale_factor = mmcv.imrescale( + results['img'], results['scale'], return_scale=True) + # the w_scale and h_scale has minor difference + # a real fix should be done in the mmcv.imrescale in the future + new_h, new_w = img.shape[:2] + h, w = results['img'].shape[:2] + w_scale = new_w / w + h_scale = new_h / h + else: + img, w_scale, h_scale = mmcv.imresize( + results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], + dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_seg(self, results): + """Resize semantic segmentation map with ``results['scale']``.""" + for key in results.get('seg_fields', []): + if self.keep_ratio: + gt_seg = mmcv.imrescale( + results[key], results['scale'], interpolation='nearest') + else: + gt_seg = mmcv.imresize( + results[key], results['scale'], interpolation='nearest') + results[key] = gt_seg + + def __call__(self, results): + """Call function to resize images, bounding boxes, masks, semantic + segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', + 'keep_ratio' keys are added into result dict. + """ + + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})') + return repr_str + + +@PIPELINES.register_module() +class RandomFlip(object): + """Flip the image & seg. + + If the input dict contains the key "flip", then the flag will be used, + otherwise it will be randomly decided by a ratio specified in the init + method. + + Args: + prob (float, optional): The flipping probability. Default: None. + direction(str, optional): The flipping direction. Options are + 'horizontal' and 'vertical'. Default: 'horizontal'. + """ + + @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip') + def __init__(self, prob=None, direction='horizontal'): + self.prob = prob + self.direction = direction + if prob is not None: + assert prob >= 0 and prob <= 1 + assert direction in ['horizontal', 'vertical'] + + def __call__(self, results): + """Call function to flip bounding boxes, masks, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Flipped results, 'flip', 'flip_direction' keys are added into + result dict. + """ + + if 'flip' not in results: + flip = True if np.random.rand() < self.prob else False + results['flip'] = flip + if 'flip_direction' not in results: + results['flip_direction'] = self.direction + if results['flip']: + # flip image + results['img'] = mmcv.imflip( + results['img'], direction=results['flip_direction']) + + # flip segs + for key in results.get('seg_fields', []): + # use copy() to make numpy stride positive + results[key] = mmcv.imflip( + results[key], direction=results['flip_direction']).copy() + return results + + def __repr__(self): + return self.__class__.__name__ + f'(prob={self.prob})' + + +@PIPELINES.register_module() +class Pad(object): + """Pad the image & mask. + + There are two padding modes: (1) pad to a fixed size and (2) pad to the + minimum size that is divisible by some number. + Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", + + Args: + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + """ + + def __init__(self, + size=None, + size_divisor=None, + pad_val=0, + seg_pad_val=255): + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + # only one of size and size_divisor should be valid + assert size is not None or size_divisor is not None + assert size is None or size_divisor is None + + def _pad_img(self, results): + """Pad images according to ``self.size``.""" + if self.size is not None: + padded_img = mmcv.impad( + results['img'], shape=self.size, pad_val=self.pad_val) + elif self.size_divisor is not None: + padded_img = mmcv.impad_to_multiple( + results['img'], self.size_divisor, pad_val=self.pad_val) + results['img'] = padded_img + results['pad_shape'] = padded_img.shape + results['pad_fixed_size'] = self.size + results['pad_size_divisor'] = self.size_divisor + + def _pad_seg(self, results): + """Pad masks according to ``results['pad_shape']``.""" + for key in results.get('seg_fields', []): + results[key] = mmcv.impad( + results[key], + shape=results['pad_shape'][:2], + pad_val=self.seg_pad_val) + + def __call__(self, results): + """Call function to pad images, masks, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Updated result dict. + """ + + self._pad_img(results) + self._pad_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \ + f'pad_val={self.pad_val})' + return repr_str + + +@PIPELINES.register_module() +class Normalize(object): + """Normalize the image. + + Added key is "img_norm_cfg". + + Args: + mean (sequence): Mean values of 3 channels. + std (sequence): Std values of 3 channels. + to_rgb (bool): Whether to convert the image from BGR to RGB, + default is true. + """ + + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, results): + """Call function to normalize images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Normalized results, 'img_norm_cfg' key is added into + result dict. + """ + + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, + self.to_rgb) + results['img_norm_cfg'] = dict( + mean=self.mean, std=self.std, to_rgb=self.to_rgb) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \ + f'{self.to_rgb})' + return repr_str + + +@PIPELINES.register_module() +class Rerange(object): + """Rerange the image pixel value. + + Args: + min_value (float or int): Minimum value of the reranged image. + Default: 0. + max_value (float or int): Maximum value of the reranged image. + Default: 255. + """ + + def __init__(self, min_value=0, max_value=255): + assert isinstance(min_value, float) or isinstance(min_value, int) + assert isinstance(max_value, float) or isinstance(max_value, int) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def __call__(self, results): + """Call function to rerange images. + + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Reranged results. + """ + + img = results['img'] + img_min_value = np.min(img) + img_max_value = np.max(img) + + assert img_min_value < img_max_value + # rerange to [0, 1] + img = (img - img_min_value) / (img_max_value - img_min_value) + # rerange to [min_value, max_value] + img = img * (self.max_value - self.min_value) + self.min_value + results['img'] = img + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' + return repr_str + + +@PIPELINES.register_module() +class CLAHE(object): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + """ + + def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): + assert isinstance(clip_limit, (float, int)) + self.clip_limit = clip_limit + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + self.tile_grid_size = tile_grid_size + + def __call__(self, results): + """Call function to Use CLAHE method process images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + for i in range(results['img'].shape[2]): + results['img'][:, :, i] = mmcv.clahe( + np.array(results['img'][:, :, i], dtype=np.uint8), + self.clip_limit, self.tile_grid_size) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(clip_limit={self.clip_limit}, '\ + f'tile_grid_size={self.tile_grid_size})' + return repr_str + + +@PIPELINES.register_module() +class RandomCrop(object): + """Random crop the image & seg. + + Args: + crop_size (tuple): Expected size after cropping, (h, w). + cat_max_ratio (float): The maximum ratio that single category could + occupy. + """ + + def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255): + assert crop_size[0] > 0 and crop_size[1] > 0 + self.crop_size = crop_size + self.cat_max_ratio = cat_max_ratio + self.ignore_index = ignore_index + + def get_crop_bbox(self, img): + """Randomly get a crop bounding box.""" + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + return crop_y1, crop_y2, crop_x1, crop_x2 + + def crop(self, img, crop_bbox): + """Crop from ``img``""" + crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] + return img + + def __call__(self, results): + """Call function to randomly crop images, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + + img = results['img'] + crop_bbox = self.get_crop_bbox(img) + if self.cat_max_ratio < 1.: + # Repeat 10 times + for _ in range(10): + seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum( + cnt) < self.cat_max_ratio: + break + crop_bbox = self.get_crop_bbox(img) + + # crop the image + img = self.crop(img, crop_bbox) + img_shape = img.shape + results['img'] = img + results['img_shape'] = img_shape + + # crop semantic seg + for key in results.get('seg_fields', []): + results[key] = self.crop(results[key], crop_bbox) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_size={self.crop_size})' + + +@PIPELINES.register_module() +class RandomRotate(object): + """Rotate the image & seg. + + Args: + prob (float): The rotation probability. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + pad_val (float, optional): Padding value of image. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. Default: None. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. Default: False + """ + + def __init__(self, + prob, + degree, + pad_val=0, + seg_pad_val=255, + center=None, + auto_bound=False): + self.prob = prob + assert prob >= 0 and prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ + f'tuple of (min, max)' + self.pal_val = pad_val + self.seg_pad_val = seg_pad_val + self.center = center + self.auto_bound = auto_bound + + def __call__(self, results): + """Call function to rotate image, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated results. + """ + + rotate = True if np.random.rand() < self.prob else False + degree = np.random.uniform(min(*self.degree), max(*self.degree)) + if rotate: + # rotate image + results['img'] = mmcv.imrotate( + results['img'], + angle=degree, + border_value=self.pal_val, + center=self.center, + auto_bound=self.auto_bound) + + # rotate segs + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate( + results[key], + angle=degree, + border_value=self.seg_pad_val, + center=self.center, + auto_bound=self.auto_bound, + interpolation='nearest') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' \ + f'degree={self.degree}, ' \ + f'pad_val={self.pal_val}, ' \ + f'seg_pad_val={self.seg_pad_val}, ' \ + f'center={self.center}, ' \ + f'auto_bound={self.auto_bound})' + return repr_str + + +@PIPELINES.register_module() +class RGB2Gray(object): + """Convert RGB image to grayscale image. + + This transform calculate the weighted mean of input image channels with + ``weights`` and then expand the channels to ``out_channels``. When + ``out_channels`` is None, the number of output channels is the same as + input channels. + + Args: + out_channels (int): Expected number of output channels after + transforming. Default: None. + weights (tuple[float]): The weights to calculate the weighted mean. + Default: (0.299, 0.587, 0.114). + """ + + def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): + assert out_channels is None or out_channels > 0 + self.out_channels = out_channels + assert isinstance(weights, tuple) + for item in weights: + assert isinstance(item, (float, int)) + self.weights = weights + + def __call__(self, results): + """Call function to convert RGB image to grayscale image. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with grayscale image. + """ + img = results['img'] + assert len(img.shape) == 3 + assert img.shape[2] == len(self.weights) + weights = np.array(self.weights).reshape((1, 1, -1)) + img = (img * weights).sum(2, keepdims=True) + if self.out_channels is None: + img = img.repeat(weights.shape[2], axis=2) + else: + img = img.repeat(self.out_channels, axis=2) + + results['img'] = img + results['img_shape'] = img.shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(out_channels={self.out_channels}, ' \ + f'weights={self.weights})' + return repr_str + + +@PIPELINES.register_module() +class AdjustGamma(object): + """Using gamma correction to process the image. + + Args: + gamma (float or int): Gamma value used in gamma correction. + Default: 1.0. + """ + + def __init__(self, gamma=1.0): + assert isinstance(gamma, float) or isinstance(gamma, int) + assert gamma > 0 + self.gamma = gamma + inv_gamma = 1.0 / gamma + self.table = np.array([(i / 255.0)**inv_gamma * 255 + for i in np.arange(256)]).astype('uint8') + + def __call__(self, results): + """Call function to process the image with gamma correction. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + results['img'] = mmcv.lut_transform( + np.array(results['img'], dtype=np.uint8), self.table) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(gamma={self.gamma})' + + +@PIPELINES.register_module() +class SegRescale(object): + """Rescale semantic segmentation maps. + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def __call__(self, results): + """Call function to scale the semantic segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with semantic segmentation map scaled. + """ + for key in results.get('seg_fields', []): + if self.scale_factor != 1: + results[key] = mmcv.imrescale( + results[key], self.scale_factor, interpolation='nearest') + return results + + def __repr__(self): + return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' + + +@PIPELINES.register_module() +class PhotoMetricDistortion(object): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def convert(self, img, alpha=1, beta=0): + """Multiple with alpha and add beat with clip.""" + img = img.astype(np.float32) * alpha + beta + img = np.clip(img, 0, 255) + return img.astype(np.uint8) + + def brightness(self, img): + """Brightness distortion.""" + if random.randint(2): + return self.convert( + img, + beta=random.uniform(-self.brightness_delta, + self.brightness_delta)) + return img + + def contrast(self, img): + """Contrast distortion.""" + if random.randint(2): + return self.convert( + img, + alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return img + + def saturation(self, img): + """Saturation distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 1] = self.convert( + img[:, :, 1], + alpha=random.uniform(self.saturation_lower, + self.saturation_upper)) + img = mmcv.hsv2bgr(img) + return img + + def hue(self, img): + """Hue distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, + 0] = (img[:, :, 0].astype(int) + + random.randint(-self.hue_delta, self.hue_delta)) % 180 + img = mmcv.hsv2bgr(img) + return img + + def __call__(self, results): + """Call function to perform photometric distortion on images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with images distorted. + """ + + img = results['img'] + # random brightness + img = self.brightness(img) + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + img = self.contrast(img) + + # random saturation + img = self.saturation(img) + + # random hue + img = self.hue(img) + + # random contrast + if mode == 0: + img = self.contrast(img) + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})') + return repr_str diff --git a/mmseg/datasets/stare.py b/mmseg/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..cbd14e0920e7f6a73baff1432e5a32ccfdb0dfae --- /dev/null +++ b/mmseg/datasets/stare.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class STAREDataset(CustomDataset): + """STARE dataset. + + In segmentation map annotation for STARE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.ah.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(STAREDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.ah.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/mmseg/datasets/voc.py b/mmseg/datasets/voc.py new file mode 100644 index 0000000000000000000000000000000000000000..a8855203b14ee0dc4da9099a2945d4aedcffbcd6 --- /dev/null +++ b/mmseg/datasets/voc.py @@ -0,0 +1,29 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalVOCDataset(CustomDataset): + """Pascal VOC dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + + CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', + 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', + 'train', 'tvmonitor') + + PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + def __init__(self, split, **kwargs): + super(PascalVOCDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/mmseg/models/__init__.py b/mmseg/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3cf93f8bec9cf0cef0a3bd76ca3ca92eb188f535 --- /dev/null +++ b/mmseg/models/__init__.py @@ -0,0 +1,12 @@ +from .backbones import * # noqa: F401,F403 +from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, + build_head, build_loss, build_segmentor) +from .decode_heads import * # noqa: F401,F403 +from .losses import * # noqa: F401,F403 +from .necks import * # noqa: F401,F403 +from .segmentors import * # noqa: F401,F403 + +__all__ = [ + 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', + 'build_head', 'build_loss', 'build_segmentor' +] diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5ec47fcc9896dcf2568fb7e4aed8e8906d5eaeba --- /dev/null +++ b/mmseg/models/backbones/__init__.py @@ -0,0 +1,17 @@ +from .cgnet import CGNet +from .fast_scnn import FastSCNN +from .hrnet import HRNet +from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v3 import MobileNetV3 +from .resnest import ResNeSt +from .resnet import ResNet, ResNetV1c, ResNetV1d +from .resnext import ResNeXt +from .unet import UNet +from .vit import VisionTransformer +from .uniformer import UniFormer + +__all__ = [ + 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', + 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', + 'VisionTransformer', 'UniFormer' +] diff --git a/mmseg/models/backbones/cgnet.py b/mmseg/models/backbones/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..032a55d85f04caa4801b2d6a9768518a33892462 --- /dev/null +++ b/mmseg/models/backbones/cgnet.py @@ -0,0 +1,367 @@ +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer, + constant_init, kaiming_init) +from mmcv.runner import load_checkpoint +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES + + +class GlobalContextExtractor(nn.Module): + """Global Context Extractor for CGNet. + + This class is employed to refine the joint feature of both local feature + and surrounding context. + + Args: + channel (int): Number of input feature channels. + reduction (int): Reductions for global context extractor. Default: 16. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, channel, reduction=16, with_cp=False): + super(GlobalContextExtractor, self).__init__() + self.channel = channel + self.reduction = reduction + assert reduction >= 1 and channel >= reduction + self.with_cp = with_cp + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + + def forward(self, x): + + def _inner_forward(x): + num_batch, num_channel = x.size()[:2] + y = self.avg_pool(x).view(num_batch, num_channel) + y = self.fc(y).view(num_batch, num_channel, 1, 1) + return x * y + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class ContextGuidedBlock(nn.Module): + """Context Guided Block for CGNet. + + This class consists of four components: local feature extractor, + surrounding feature extractor, joint feature extractor and global + context extractor. + + Args: + in_channels (int): Number of input feature channels. + out_channels (int): Number of output feature channels. + dilation (int): Dilation rate for surrounding context extractor. + Default: 2. + reduction (int): Reduction for global context extractor. Default: 16. + skip_connect (bool): Add input to output or not. Default: True. + downsample (bool): Downsample the input to 1/2 or not. Default: False. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False): + super(ContextGuidedBlock, self).__init__() + self.with_cp = with_cp + self.downsample = downsample + + channels = out_channels if downsample else out_channels // 2 + if 'type' in act_cfg and act_cfg['type'] == 'PReLU': + act_cfg['num_parameters'] = channels + kernel_size = 3 if downsample else 1 + stride = 2 if downsample else 1 + padding = (kernel_size - 1) // 2 + + self.conv1x1 = ConvModule( + in_channels, + channels, + kernel_size, + stride, + padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.f_loc = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=1, + groups=channels, + bias=False) + self.f_sur = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=dilation, + groups=channels, + dilation=dilation, + bias=False) + + self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] + self.activate = nn.PReLU(2 * channels) + + if downsample: + self.bottleneck = build_conv_layer( + conv_cfg, + 2 * channels, + out_channels, + kernel_size=1, + bias=False) + + self.skip_connect = skip_connect and not downsample + self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) + + def forward(self, x): + + def _inner_forward(x): + out = self.conv1x1(x) + loc = self.f_loc(out) + sur = self.f_sur(out) + + joi_feat = torch.cat([loc, sur], 1) # the joint feature + joi_feat = self.bn(joi_feat) + joi_feat = self.activate(joi_feat) + if self.downsample: + joi_feat = self.bottleneck(joi_feat) # channel = out_channels + # f_glo is employed to refine the joint feature + out = self.f_glo(joi_feat) + + if self.skip_connect: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InputInjection(nn.Module): + """Downsampling module for CGNet.""" + + def __init__(self, num_downsampling): + super(InputInjection, self).__init__() + self.pool = nn.ModuleList() + for i in range(num_downsampling): + self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + for pool in self.pool: + x = pool(x) + return x + + +@BACKBONES.register_module() +class CGNet(nn.Module): + """CGNet backbone. + + A Light-weight Context Guided Network for Semantic Segmentation + arXiv: https://arxiv.org/abs/1811.08201 + + Args: + in_channels (int): Number of input image channels. Normally 3. + num_channels (tuple[int]): Numbers of feature channels at each stages. + Default: (32, 64, 128). + num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. + Default: (3, 21). + dilations (tuple[int]): Dilation rate for surrounding context + extractors at stage 1 and stage 2. Default: (2, 4). + reductions (tuple[int]): Reductions for global context extractors at + stage 1 and stage 2. Default: (8, 16). + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False): + + super(CGNet, self).__init__() + self.in_channels = in_channels + self.num_channels = num_channels + assert isinstance(self.num_channels, tuple) and len( + self.num_channels) == 3 + self.num_blocks = num_blocks + assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 + self.dilations = dilations + assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 + self.reductions = reductions + assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': + self.act_cfg['num_parameters'] = num_channels[0] + self.norm_eval = norm_eval + self.with_cp = with_cp + + cur_channels = in_channels + self.stem = nn.ModuleList() + for i in range(3): + self.stem.append( + ConvModule( + cur_channels, + num_channels[0], + 3, + 2 if i == 0 else 1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + cur_channels = num_channels[0] + + self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 + self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 + + cur_channels += in_channels + self.norm_prelu_0 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 1 + self.level1 = nn.ModuleList() + for i in range(num_blocks[0]): + self.level1.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[1], + num_channels[1], + dilations[0], + reductions[0], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[1] + in_channels + self.norm_prelu_1 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 2 + self.level2 = nn.ModuleList() + for i in range(num_blocks[1]): + self.level2.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[2], + num_channels[2], + dilations[1], + reductions[1], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[2] + self.norm_prelu_2 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + def forward(self, x): + output = [] + + # stage 0 + inp_2x = self.inject_2x(x) + inp_4x = self.inject_4x(x) + for layer in self.stem: + x = layer(x) + x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) + output.append(x) + + # stage 1 + for i, layer in enumerate(self.level1): + x = layer(x) + if i == 0: + down1 = x + x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) + output.append(x) + + # stage 2 + for i, layer in enumerate(self.level2): + x = layer(x) + if i == 0: + down2 = x + x = self.norm_prelu_2(torch.cat([down2, x], 1)) + output.append(x) + + return output + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + elif isinstance(m, nn.PReLU): + constant_init(m, 0) + else: + raise TypeError('pretrained must be a str or None') + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(CGNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/fast_scnn.py b/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..ee115ffda1b32d9ab2181541f54489168cf6a3ec --- /dev/null +++ b/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,375 @@ +import torch +import torch.nn as nn +from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, constant_init, + kaiming_init) +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.models.decode_heads.psp_head import PPM +from mmseg.ops import resize +from ..builder import BACKBONES +from ..utils.inverted_residual import InvertedResidual + + +class LearningToDownsample(nn.Module): + """Learning to downsample module. + + Args: + in_channels (int): Number of input channels. + dw_channels (tuple[int]): Number of output channels of the first and + the second depthwise conv (dwconv) layers. + out_channels (int): Number of output channels of the whole + 'learning to downsample' module. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + """ + + def __init__(self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU')): + super(LearningToDownsample, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + dw_channels1 = dw_channels[0] + dw_channels2 = dw_channels[1] + + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, + dw_channels2, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg) + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, + out_channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module. + + Args: + in_channels (int): Number of input channels of the GFE module. + Default: 64 + block_channels (tuple[int]): Tuple of ints. Each int specifies the + number of output channels of each Inverted Residual module. + Default: (64, 96, 128) + out_channels(int): Number of output channels of the GFE module. + Default: 128 + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + Default: 6 + num_blocks (tuple[int]): Tuple of ints. Each int specifies the + number of times each Inverted Residual module is repeated. + The repeated Inverted Residual modules are called a 'group'. + Default: (3, 3, 3) + strides (tuple[int]): Tuple of ints. Each int specifies + the downsampling factor of each 'group'. + Default: (2, 2, 1) + pool_scales (tuple[int]): Tuple of ints. Each int specifies + the parameter required in 'global average pooling' within PPM. + Default: (1, 2, 3, 6) + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(GlobalFeatureExtractor, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], + num_blocks[0], strides[0], + expand_ratio) + self.bottleneck2 = self._make_layer(block_channels[0], + block_channels[1], num_blocks[1], + strides[1], expand_ratio) + self.bottleneck3 = self._make_layer(block_channels[1], + block_channels[2], num_blocks[2], + strides[2], expand_ratio) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners) + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _make_layer(self, + in_channels, + out_channels, + blocks, + stride=1, + expand_ratio=6): + layers = [ + InvertedResidual( + in_channels, + out_channels, + stride, + expand_ratio, + norm_cfg=self.norm_cfg) + ] + for i in range(1, blocks): + layers.append( + InvertedResidual( + out_channels, + out_channels, + 1, + expand_ratio, + norm_cfg=self.norm_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module. + + Args: + higher_in_channels (int): Number of input channels of the + higher-resolution branch. + lower_in_channels (int): Number of input channels of the + lower-resolution branch. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(FeatureFusionModule, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.conv_lower_res = ConvModule( + out_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.conv_higher_res = ConvModule( + higher_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, + size=higher_res_feature.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@BACKBONES.register_module() +class FastSCNN(nn.Module): + """Fast-SCNN Backbone. + + Args: + in_channels (int): Number of input image channels. Default: 3. + downsample_dw_channels (tuple[int]): Number of output channels after + the first conv layer & the second conv layer in + Learning-To-Downsample (LTD) module. + Default: (32, 48). + global_in_channels (int): Number of input channels of + Global Feature Extractor(GFE). + Equal to number of output channels of LTD. + Default: 64. + global_block_channels (tuple[int]): Tuple of integers that describe + the output channels for each of the MobileNet-v2 bottleneck + residual blocks in GFE. + Default: (64, 96, 128). + global_block_strides (tuple[int]): Tuple of integers + that describe the strides (downsampling factors) for each of the + MobileNet-v2 bottleneck residual blocks in GFE. + Default: (2, 2, 1). + global_out_channels (int): Number of output channels of GFE. + Default: 128. + higher_in_channels (int): Number of input channels of the higher + resolution branch in FFM. + Equal to global_in_channels. + Default: 64. + lower_in_channels (int): Number of input channels of the lower + resolution branch in FFM. + Equal to global_out_channels. + Default: 128. + fusion_out_channels (int): Number of output channels of FFM. + Default: 128. + out_indices (tuple): Tuple of indices of list + [higher_res_features, lower_res_features, fusion_output]. + Often set to (0,1,2) to enable aux. heads. + Default: (0, 1, 2). + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + + super(FastSCNN, self).__init__() + if global_in_channels != higher_in_channels: + raise AssertionError('Global Input Channels must be the same \ + with Higher Input Channels!') + elif global_out_channels != lower_in_channels: + raise AssertionError('Global Output Channels must be the same \ + with Lower Input Channels!') + + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels[0] + self.downsample_dw_channels2 = downsample_dw_channels[1] + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_block_strides = global_block_strides + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + strides=self.global_block_strides, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def init_weights(self, pretrained=None): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, + lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/mmseg/models/backbones/hrnet.py b/mmseg/models/backbones/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..5010a2e767951b1d5a1d67234d4c4517926b44c5 --- /dev/null +++ b/mmseg/models/backbones/hrnet.py @@ -0,0 +1,555 @@ +import torch.nn as nn +from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init, + kaiming_init) +from mmcv.runner import load_checkpoint +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.ops import Upsample, resize +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from .resnet import BasicBlock, Bottleneck + + +class HRModule(nn.Module): + """High-Resolution Module for HRNet. + + In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange + is in this module. + """ + + def __init__(self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True)): + super(HRModule, self).__init__() + self._check_branches(num_branches, num_blocks, in_channels, + num_channels) + + self.in_channels = in_channels + self.num_branches = num_branches + + self.multiscale_output = multiscale_output + self.norm_cfg = norm_cfg + self.conv_cfg = conv_cfg + self.with_cp = with_cp + self.branches = self._make_branches(num_branches, blocks, num_blocks, + num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=False) + + def _check_branches(self, num_branches, num_blocks, in_channels, + num_channels): + """Check branches configuration.""" + if num_branches != len(num_blocks): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \ + f'{len(num_blocks)})' + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \ + f'{len(num_channels)})' + raise ValueError(error_msg) + + if num_branches != len(in_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \ + f'{len(in_channels)})' + raise ValueError(error_msg) + + def _make_one_branch(self, + branch_index, + block, + num_blocks, + num_channels, + stride=1): + """Build one branch.""" + downsample = None + if stride != 1 or \ + self.in_channels[branch_index] != \ + num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + self.in_channels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * + block.expansion)[1]) + + layers = [] + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + self.in_channels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + """Build multiple branch.""" + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + """Build fuse layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + in_channels = self.in_channels + fuse_layers = [] + num_out_branches = num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + # we set align_corners=False for HRNet + Upsample( + scale_factor=2**(j - i), + mode='bilinear', + align_corners=False))) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[i])[1])) + else: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[j])[1], + nn.ReLU(inplace=False))) + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def forward(self, x): + """Forward function.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = 0 + for j in range(self.num_branches): + if i == j: + y += x[j] + elif j > i: + y = y + resize( + self.fuse_layers[i][j](x[j]), + size=x[i].shape[2:], + mode='bilinear', + align_corners=False) + else: + y += self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +@BACKBONES.register_module() +class HRNet(nn.Module): + """HRNet backbone. + + High-Resolution Representations for Labeling Pixels and Regions + arXiv: https://arxiv.org/abs/1904.04514 + + Args: + extra (dict): detailed configuration for each stage of HRNet. + in_channels (int): Number of input image channels. Normally 3. + conv_cfg (dict): dictionary to construct and config conv layer. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from mmseg.models import HRNet + >>> import torch + >>> extra = dict( + >>> stage1=dict( + >>> num_modules=1, + >>> num_branches=1, + >>> block='BOTTLENECK', + >>> num_blocks=(4, ), + >>> num_channels=(64, )), + >>> stage2=dict( + >>> num_modules=1, + >>> num_branches=2, + >>> block='BASIC', + >>> num_blocks=(4, 4), + >>> num_channels=(32, 64)), + >>> stage3=dict( + >>> num_modules=4, + >>> num_branches=3, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4), + >>> num_channels=(32, 64, 128)), + >>> stage4=dict( + >>> num_modules=3, + >>> num_branches=4, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4, 4), + >>> num_channels=(32, 64, 128, 256))) + >>> self = HRNet(extra, in_channels=1) + >>> self.eval() + >>> inputs = torch.rand(1, 1, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 32, 8, 8) + (1, 64, 4, 4) + (1, 128, 2, 2) + (1, 256, 1, 1) + """ + + blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} + + def __init__(self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + zero_init_residual=False): + super(HRNet, self).__init__() + self.extra = extra + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + self.zero_init_residual = zero_init_residual + + # stem net + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) + + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + self.conv_cfg, + 64, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.relu = nn.ReLU(inplace=True) + + # stage 1 + self.stage1_cfg = self.extra['stage1'] + num_channels = self.stage1_cfg['num_channels'][0] + block_type = self.stage1_cfg['block'] + num_blocks = self.stage1_cfg['num_blocks'][0] + + block = self.blocks_dict[block_type] + stage1_out_channels = num_channels * block.expansion + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + + # stage 2 + self.stage2_cfg = self.extra['stage2'] + num_channels = self.stage2_cfg['num_channels'] + block_type = self.stage2_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition1 = self._make_transition_layer([stage1_out_channels], + num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + # stage 3 + self.stage3_cfg = self.extra['stage3'] + num_channels = self.stage3_cfg['num_channels'] + block_type = self.stage3_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition2 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + # stage 4 + self.stage4_cfg = self.extra['stage4'] + num_channels = self.stage4_cfg['num_channels'] + block_type = self.stage4_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition3 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: the normalization layer named "norm2" """ + return getattr(self, self.norm2_name) + + def _make_transition_layer(self, num_channels_pre_layer, + num_channels_cur_layer): + """Make transition layer.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + in_channels = num_channels_pre_layer[-1] + out_channels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else in_channels + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, out_channels)[1], + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv_downsamples)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make each layer.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + inplanes, + planes, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, in_channels, multiscale_output=True): + """Make each stage.""" + num_modules = layer_config['num_modules'] + num_branches = layer_config['num_branches'] + num_blocks = layer_config['num_blocks'] + num_channels = layer_config['num_channels'] + block = self.blocks_dict[layer_config['block']] + + hr_modules = [] + for i in range(num_modules): + # multi_scale_output is only used for the last module + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + + hr_modules.append( + HRModule( + num_branches, + block, + num_blocks, + in_channels, + num_channels, + reset_multiscale_output, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*hr_modules), in_channels + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['num_branches']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['num_branches']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['num_branches']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + return y_list + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(HRNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/mobilenet_v2.py b/mmseg/models/backbones/mobilenet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..5820b4b13c0019d67801c5f924650e928acca72e --- /dev/null +++ b/mmseg/models/backbones/mobilenet_v2.py @@ -0,0 +1,180 @@ +import logging + +import torch.nn as nn +from mmcv.cnn import ConvModule, constant_init, kaiming_init +from mmcv.runner import load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidual, make_divisible + + +@BACKBONES.register_module() +class MobileNetV2(nn.Module): + """MobileNetV2 backbone. + + Args: + widen_factor (float): Width multiplier, multiply number of + channels in each layer by this amount. Default: 1.0. + strides (Sequence[int], optional): Strides of the first block of each + layer. If not specified, default config in ``arch_setting`` will + be used. + dilations (Sequence[int]): Dilation of each layer. + out_indices (None or Sequence[int]): Output from which stages. + Default: (7, ). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: expand_ratio, channel, num_blocks. + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], + [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__(self, + widen_factor=1., + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False): + super(MobileNetV2, self).__init__() + self.widen_factor = widen_factor + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == len(self.arch_settings) + self.out_indices = out_indices + for index in out_indices: + if index not in range(0, 7): + raise ValueError('the item in out_indices must in ' + f'range(0, 8). But received {index}') + + if frozen_stages not in range(-1, 7): + raise ValueError('frozen_stages must be in range(-1, 7). ' + f'But received {frozen_stages}') + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + + self.in_channels = make_divisible(32 * widen_factor, 8) + + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.layers = [] + + for i, layer_cfg in enumerate(self.arch_settings): + expand_ratio, channel, num_blocks = layer_cfg + stride = self.strides[i] + dilation = self.dilations[i] + out_channels = make_divisible(channel * widen_factor, 8) + inverted_res_layer = self.make_layer( + out_channels=out_channels, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + expand_ratio=expand_ratio) + layer_name = f'layer{i + 1}' + self.add_module(layer_name, inverted_res_layer) + self.layers.append(layer_name) + + def make_layer(self, out_channels, num_blocks, stride, dilation, + expand_ratio): + """Stack InvertedResidual blocks to build a layer for MobileNetV2. + + Args: + out_channels (int): out_channels of block. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. + dilation (int): Dilation of the first block. + expand_ratio (int): Expand the number of channels of the + hidden layer in InvertedResidual by this ratio. + """ + layers = [] + for i in range(num_blocks): + layers.append( + InvertedResidual( + self.in_channels, + out_channels, + stride if i == 0 else 1, + expand_ratio=expand_ratio, + dilation=dilation if i == 0 else 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + with_cp=self.with_cp)) + self.in_channels = out_channels + + return nn.Sequential(*layers) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for i in range(1, self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV2, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/mobilenet_v3.py b/mmseg/models/backbones/mobilenet_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..f2e9a0cc00a9e79fda5e5d1ce963c2523397afd2 --- /dev/null +++ b/mmseg/models/backbones/mobilenet_v3.py @@ -0,0 +1,255 @@ +import logging + +import mmcv +import torch.nn as nn +from mmcv.cnn import ConvModule, constant_init, kaiming_init +from mmcv.cnn.bricks import Conv2dAdaptivePadding +from mmcv.runner import load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidualV3 as InvertedResidual + + +@BACKBONES.register_module() +class MobileNetV3(nn.Module): + """MobileNetV3 backbone. + + This backbone is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + arch (str): Architecture of mobilnetv3, from {'small', 'large'}. + Default: 'small'. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + out_indices (tuple[int]): Output from which layer. + Default: (0, 1, 12). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + """ + # Parameters to build each block: + # [kernel size, mid channels, out channels, with_se, act type, stride] + arch_settings = { + 'small': [[3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1]], + 'large': [[3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1]] + } # yapf: disable + + def __init__(self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False): + super(MobileNetV3, self).__init__() + assert arch in self.arch_settings + assert isinstance(reduction_factor, int) and reduction_factor > 0 + assert mmcv.is_tuple_of(out_indices, int) + for index in out_indices: + if index not in range(0, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'the item in out_indices must in ' + f'range(0, {len(self.arch_settings[arch])+2}). ' + f'But received {index}') + + if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): + raise ValueError('frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}') + self.arch = arch + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.reduction_factor = reduction_factor + self.norm_eval = norm_eval + self.with_cp = with_cp + self.layers = self._make_layer() + + def _make_layer(self): + layers = [] + + # build the first layer (layer0) + in_channels = 16 + layer = ConvModule( + in_channels=3, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='Conv2dAdaptivePadding'), + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + self.add_module('layer0', layer) + layers.append('layer0') + + layer_setting = self.arch_settings[self.arch] + for i, params in enumerate(layer_setting): + (kernel_size, mid_channels, out_channels, with_se, act, + stride) = params + + if self.arch == 'large' and i >= 12 or self.arch == 'small' and \ + i >= 8: + mid_channels = mid_channels // self.reduction_factor + out_channels = out_channels // self.reduction_factor + + if with_se: + se_cfg = dict( + channels=mid_channels, + ratio=4, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))) + else: + se_cfg = None + + layer = InvertedResidual( + in_channels=in_channels, + out_channels=out_channels, + mid_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + se_cfg=se_cfg, + with_expand_conv=(in_channels != mid_channels), + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type=act), + with_cp=self.with_cp) + in_channels = out_channels + layer_name = 'layer{}'.format(i + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # build the last layer + # block5 layer12 os=32 for small model + # block6 layer16 os=32 for large model + layer = ConvModule( + in_channels=in_channels, + out_channels=576 if self.arch == 'small' else 960, + kernel_size=1, + stride=1, + dilation=4, + padding=0, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + layer_name = 'layer{}'.format(len(layer_setting) + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # next, convert backbone MobileNetV3 to a semantic segmentation version + if self.arch == 'small': + self.layer4.depthwise_conv.conv.stride = (1, 1) + self.layer9.depthwise_conv.conv.stride = (1, 1) + for i in range(4, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 9: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + else: + self.layer7.depthwise_conv.conv.stride = (1, 1) + self.layer13.depthwise_conv.conv.stride = (1, 1) + for i in range(7, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 13: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + + return layers + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + def _freeze_stages(self): + for i in range(self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV3, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/resnest.py b/mmseg/models/backbones/resnest.py new file mode 100644 index 0000000000000000000000000000000000000000..8931decb876e4d46407fd177a5248fe2554e4062 --- /dev/null +++ b/mmseg/models/backbones/resnest.py @@ -0,0 +1,314 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNetV1d + + +class RSoftmax(nn.Module): + """Radix Softmax module in ``SplitAttentionConv2d``. + + Args: + radix (int): Radix of input. + groups (int): Groups of input. + """ + + def __init__(self, radix, groups): + super().__init__() + self.radix = radix + self.groups = groups + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttentionConv2d(nn.Module): + """Split-Attention Conv2d in ResNeSt. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels. Default: 4. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + dcn (dict): Config dict for DCN. Default: None. + """ + + def __init__(self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None): + super(SplitAttentionConv2d, self).__init__() + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.groups = groups + self.channels = channels + self.with_dcn = dcn is not None + self.dcn = dcn + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if self.with_dcn and not fallback_on_stride: + assert conv_cfg is None, 'conv_cfg must be None for DCN' + conv_cfg = dcn + self.conv = build_conv_layer( + conv_cfg, + in_channels, + channels * radix, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups * radix, + bias=False) + self.norm0_name, norm0 = build_norm_layer( + norm_cfg, channels * radix, postfix=0) + self.add_module(self.norm0_name, norm0) + self.relu = nn.ReLU(inplace=True) + self.fc1 = build_conv_layer( + None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, inter_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.fc2 = build_conv_layer( + None, inter_channels, channels * radix, 1, groups=self.groups) + self.rsoftmax = RSoftmax(radix, groups) + + @property + def norm0(self): + """nn.Module: the normalization layer named "norm0" """ + return getattr(self, self.norm0_name) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def forward(self, x): + x = self.conv(x) + x = self.norm0(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + batch = x.size(0) + if self.radix > 1: + splits = x.view(batch, self.radix, -1, *x.shape[2:]) + gap = splits.sum(dim=1) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + gap = self.norm1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) + out = torch.sum(attens * splits, dim=1) + else: + out = atten * x + return out.contiguous() + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeSt. + + Args: + inplane (int): Input planes of this block. + planes (int): Middle planes of this block. + groups (int): Groups of conv2. + width_per_group (int): Width per group of conv2. 64x4d indicates + ``groups=64, width_per_group=4`` and 32x8d indicates + ``groups=32, width_per_group=8``. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Key word arguments for base class. + """ + expansion = 4 + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + """Bottleneck block for ResNeSt.""" + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + self.with_modulated_dcn = False + self.conv2 = SplitAttentionConv2d( + width, + width, + kernel_size=3, + stride=1 if self.avg_down_stride else self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + radix=radix, + reduction_factor=reduction_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dcn=self.dcn) + delattr(self, self.norm2_name) + + if self.avg_down_stride: + self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) + + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + def forward(self, x): + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + + if self.avg_down_stride: + out = self.avd_layer(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNeSt(ResNetV1d): + """ResNeSt backbone. + + Args: + groups (int): Number of groups of Bottleneck. Default: 1 + base_width (int): Base width of Bottleneck. Default: 4 + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Keyword arguments for ResNet. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + 200: (Bottleneck, (3, 24, 36, 3)) + } + + def __init__(self, + groups=1, + base_width=4, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + self.groups = groups + self.base_width = base_width + self.radix = radix + self.reduction_factor = reduction_factor + self.avg_down_stride = avg_down_stride + super(ResNeSt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + radix=self.radix, + reduction_factor=self.reduction_factor, + avg_down_stride=self.avg_down_stride, + **kwargs) diff --git a/mmseg/models/backbones/resnet.py b/mmseg/models/backbones/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f6c4c08d4789163edc6c5e78aa8306e006fdd25e --- /dev/null +++ b/mmseg/models/backbones/resnet.py @@ -0,0 +1,688 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer, + constant_init, kaiming_init) +from mmcv.runner import load_checkpoint +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import ResLayer + + +class BasicBlock(nn.Module): + """Basic block for ResNet.""" + + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None): + super(BasicBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None): + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv1' + ] + self.after_conv2_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv2' + ] + self.after_conv3_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv3' + ] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + conv_cfg, + planes, + planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins( + planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins( + planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins( + planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer( + plugin, + in_channels=in_channels, + postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + """Forward function for plugins.""" + out = x + for name in plugin_names: + out = getattr(self, name)(x) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Default" 3. + stem_channels (int): Number of stem channels. Default: 64. + base_channels (int): Number of base channels of res layer. Default: 64. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + norm_cfg (dict): Dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + + - position (str, required): Position inside block to insert plugin, + options: 'after_conv1', 'after_conv2', 'after_conv3'. + + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages' + multi_grid (Sequence[int]|None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from mmseg.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + self.depth = depth + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.multi_grid = multi_grid + self.contract_dilation = contract_dilation + self.zero_init_residual = zero_init_residual + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + # multi grid is applied to last layer only + stage_multi_grid = multi_grid if i == len( + self.stage_blocks) - 1 else None + planes = base_channels * 2**i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + multi_grid=stage_multi_grid, + contract_dilation=contract_dilation) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i+1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2**( + len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """make plugins for ResNet 'stage_idx'th stage . + + Currently we support to insert 'context_block', + 'empirical_attention_block', 'nonlocal_block' into the backbone like + ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be : + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose 'stage_idx=0', the structure of blocks in the stage would be: + conv1-> conv2->conv3->yyy->zzz1->zzz2 + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + """Make stem layer for ResNet.""" + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True)) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.dcn is not None: + for m in self.modules(): + if isinstance(m, Bottleneck) and hasattr( + m, 'conv2_offset'): + constant_init(m.conv2_offset, 0) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(ResNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@BACKBONES.register_module() +class ResNetV1c(ResNet): + """ResNetV1c variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv + in the input stem with three 3x3 convs. + + References: + .. [1] https://arxiv.org/pdf/1812.01187.pdf + """ + + def __init__(self, **kwargs): + super(ResNetV1c, self).__init__( + deep_stem=True, avg_down=False, **kwargs) + + +@BACKBONES.register_module() +class ResNetV1d(ResNet): + """ResNetV1d variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super(ResNetV1d, self).__init__( + deep_stem=True, avg_down=True, **kwargs) diff --git a/mmseg/models/backbones/resnext.py b/mmseg/models/backbones/resnext.py new file mode 100644 index 0000000000000000000000000000000000000000..fa8149ce2fcbf67703248621c4f5dfb90a852379 --- /dev/null +++ b/mmseg/models/backbones/resnext.py @@ -0,0 +1,145 @@ +import math + +from mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + **kwargs): + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer( + self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + self.with_modulated_dcn = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + self.dcn, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + +@BACKBONES.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Normally 3. + num_stages (int): Resnet stages, normally 4. + groups (int): Group of resnext. + base_width (int): Base width of resnext. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from mmseg.models import ResNeXt + >>> import torch + >>> self = ResNeXt(depth=50) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 256, 8, 8) + (1, 512, 4, 4) + (1, 1024, 2, 2) + (1, 2048, 1, 1) + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super(ResNeXt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + **kwargs) diff --git a/mmseg/models/backbones/unet.py b/mmseg/models/backbones/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..6cbda009df009729dffc9da9e6da119a19f3cd53 --- /dev/null +++ b/mmseg/models/backbones/unet.py @@ -0,0 +1,429 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import (UPSAMPLE_LAYERS, ConvModule, build_activation_layer, + build_norm_layer, constant_init, kaiming_init) +from mmcv.runner import load_checkpoint +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import UpConvBlock + + +class BasicConvBlock(nn.Module): + """Basic convolutional block for UNet. + + This module consists of several plain convolutional layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers. Default: 2. + stride (int): Whether use stride convolution to downsample + the input feature map. If stride=2, it only uses stride convolution + in the first convolutional layer to downsample the input feature + map. Options are 1 or 2. Default: 1. + dilation (int): Whether use dilated convolution to expand the + receptive field. Set dilation rate of each convolutional layer and + the dilation rate of the first convolutional layer is always 1. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None): + super(BasicConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.with_cp = with_cp + convs = [] + for i in range(num_convs): + convs.append( + ConvModule( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride if i == 0 else 1, + dilation=1 if i == 0 else dilation, + padding=1 if i == 0 else dilation, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + self.convs = nn.Sequential(*convs) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.convs, x) + else: + out = self.convs(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class DeconvModule(nn.Module): + """Deconvolution upsample module in decoder for UNet (2X upsample). + + This module uses deconvolution to upsample feature map in the decoder + of UNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + kernel_size (int): Kernel size of the convolutional layer. Default: 4. + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2): + super(DeconvModule, self).__init__() + + assert (kernel_size - scale_factor >= 0) and\ + (kernel_size - scale_factor) % 2 == 0,\ + f'kernel_size should be greater than or equal to scale_factor '\ + f'and (kernel_size - scale_factor) should be even numbers, '\ + f'while the kernel size is {kernel_size} and scale_factor is '\ + f'{scale_factor}.' + + stride = scale_factor + padding = (kernel_size - scale_factor) // 2 + self.with_cp = with_cp + deconv = nn.ConvTranspose2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + + norm_name, norm = build_norm_layer(norm_cfg, out_channels) + activate = build_activation_layer(act_cfg) + self.deconv_upsamping = nn.Sequential(deconv, norm, activate) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.deconv_upsamping, x) + else: + out = self.deconv_upsamping(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class InterpConv(nn.Module): + """Interpolation upsample module in decoder for UNet. + + This module uses interpolation to upsample feature map in the decoder + of UNet. It consists of one interpolation upsample layer and one + convolutional layer. It can be one interpolation upsample layer followed + by one convolutional layer (conv_first=False) or one convolutional layer + followed by one interpolation upsample layer (conv_first=True). + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + conv_first (bool): Whether convolutional layer or interpolation + upsample layer first. Default: False. It means interpolation + upsample layer followed by one convolutional layer. + kernel_size (int): Kernel size of the convolutional layer. Default: 1. + stride (int): Stride of the convolutional layer. Default: 1. + padding (int): Padding of the convolutional layer. Default: 1. + upsample_cfg (dict): Interpolation config of the upsample layer. + Default: dict( + scale_factor=2, mode='bilinear', align_corners=False). + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False)): + super(InterpConv, self).__init__() + + self.with_cp = with_cp + conv = ConvModule( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + upsample = nn.Upsample(**upsample_cfg) + if conv_first: + self.interp_upsample = nn.Sequential(conv, upsample) + else: + self.interp_upsample = nn.Sequential(upsample, conv) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.interp_upsample, x) + else: + out = self.interp_upsample(x) + return out + + +@BACKBONES.register_module() +class UNet(nn.Module): + """UNet backbone. + U-Net: Convolutional Networks for Biomedical Image Segmentation. + https://arxiv.org/pdf/1505.04597.pdf + + Args: + in_channels (int): Number of input image channels. Default" 3. + base_channels (int): Number of base channels of each stage. + The output channels of the first stage. Default: 64. + num_stages (int): Number of stages in encoder, normally 5. Default: 5. + strides (Sequence[int 1 | 2]): Strides of each stage in encoder. + len(strides) is equal to num_stages. Normally the stride of the + first stage in encoder is 1. If strides[i]=2, it uses stride + convolution to downsample in the correspondence encoder stage. + Default: (1, 1, 1, 1, 1). + enc_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence encoder stage. + Default: (2, 2, 2, 2, 2). + dec_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence decoder stage. + Default: (2, 2, 2, 2). + downsamples (Sequence[int]): Whether use MaxPool to downsample the + feature map after the first stage of encoder + (stages: [1, num_stages)). If the correspondence encoder stage use + stride convolution (strides[i]=2), it will never use MaxPool to + downsample, even downsamples[i-1]=True. + Default: (True, True, True, True). + enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. + Default: (1, 1, 1, 1, 1). + dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. + Default: (1, 1, 1, 1). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + + Notice: + The input image size should be divisible by the whole downsample rate + of the encoder. More detail of the whole downsample rate can be found + in UNet._check_input_divisible. + + """ + + def __init__(self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None): + super(UNet, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + assert len(strides) == num_stages, \ + 'The length of strides should be equal to num_stages, '\ + f'while the strides is {strides}, the length of '\ + f'strides is {len(strides)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_num_convs) == num_stages, \ + 'The length of enc_num_convs should be equal to num_stages, '\ + f'while the enc_num_convs is {enc_num_convs}, the length of '\ + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_num_convs) == (num_stages-1), \ + 'The length of dec_num_convs should be equal to (num_stages-1), '\ + f'while the dec_num_convs is {dec_num_convs}, the length of '\ + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(downsamples) == (num_stages-1), \ + 'The length of downsamples should be equal to (num_stages-1), '\ + f'while the downsamples is {downsamples}, the length of '\ + f'downsamples is {len(downsamples)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_dilations) == num_stages, \ + 'The length of enc_dilations should be equal to num_stages, '\ + f'while the enc_dilations is {enc_dilations}, the length of '\ + f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_dilations) == (num_stages-1), \ + 'The length of dec_dilations should be equal to (num_stages-1), '\ + f'while the dec_dilations is {dec_dilations}, the length of '\ + f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\ + f'{num_stages}.' + self.num_stages = num_stages + self.strides = strides + self.downsamples = downsamples + self.norm_eval = norm_eval + self.base_channels = base_channels + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(num_stages): + enc_conv_block = [] + if i != 0: + if strides[i] == 1 and downsamples[i - 1]: + enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) + upsample = (strides[i] != 1 or downsamples[i - 1]) + self.decoder.append( + UpConvBlock( + conv_block=BasicConvBlock, + in_channels=base_channels * 2**i, + skip_channels=base_channels * 2**(i - 1), + out_channels=base_channels * 2**(i - 1), + num_convs=dec_num_convs[i - 1], + stride=1, + dilation=dec_dilations[i - 1], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + upsample_cfg=upsample_cfg if upsample else None, + dcn=None, + plugins=None)) + + enc_conv_block.append( + BasicConvBlock( + in_channels=in_channels, + out_channels=base_channels * 2**i, + num_convs=enc_num_convs[i], + stride=strides[i], + dilation=enc_dilations[i], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None)) + self.encoder.append((nn.Sequential(*enc_conv_block))) + in_channels = base_channels * 2**i + + def forward(self, x): + self._check_input_divisible(x) + enc_outs = [] + for enc in self.encoder: + x = enc(x) + enc_outs.append(x) + dec_outs = [x] + for i in reversed(range(len(self.decoder))): + x = self.decoder[i](enc_outs[i], x) + dec_outs.append(x) + + return dec_outs + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(UNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + def _check_input_divisible(self, x): + h, w = x.shape[-2:] + whole_downsample_rate = 1 + for i in range(1, self.num_stages): + if self.strides[i] == 2 or self.downsamples[i - 1]: + whole_downsample_rate *= 2 + assert (h % whole_downsample_rate == 0) \ + and (w % whole_downsample_rate == 0),\ + f'The input image size {(h, w)} should be divisible by the whole '\ + f'downsample rate {whole_downsample_rate}, when num_stages is '\ + f'{self.num_stages}, strides is {self.strides}, and downsamples '\ + f'is {self.downsamples}.' + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') diff --git a/mmseg/models/backbones/uniformer.py b/mmseg/models/backbones/uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..f599e981d0a31d5cd5dae01595dc1ff30513ccc5 --- /dev/null +++ b/mmseg/models/backbones/uniformer.py @@ -0,0 +1,422 @@ +# -------------------------------------------------------- +# UniFormer +# Copyright (c) 2022 SenseTime X-Lab +# Licensed under The MIT License [see LICENSE for details] +# Written by Kunchang Li +# -------------------------------------------------------- + +from collections import OrderedDict +import math + +from functools import partial +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +import numpy as np +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +from mmcv_custom import load_checkpoint +from mmseg.utils import get_root_logger +from ..builder import BACKBONES + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CMlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.act = act_layer() + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CBlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = nn.BatchNorm2d(dim) + self.conv1 = nn.Conv2d(dim, dim, 1) + self.conv2 = nn.Conv2d(dim, dim, 1) + self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = nn.BatchNorm2d(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x))))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SABlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + B, N, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.transpose(1, 2).reshape(B, N, H, W) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class SABlock_Windows(nn.Module): + def __init__(self, dim, num_heads, window_size=14, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.window_size=window_size + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x.permute(0, 2, 3, 1) + B, H, W, C = x.shape + shortcut = x + x = self.norm1(x) + + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + x_windows = window_partition(x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.permute(0, 3, 1, 2).reshape(B, C, H, W) + return x + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + self.norm = nn.LayerNorm(embed_dim) + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + B, _, H, W = x.shape + x = self.proj(x) + B, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + return x + + +@BACKBONES.register_module() +class UniFormer(nn.Module): + """ Vision Transformer + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - + https://arxiv.org/abs/2010.11929 + """ + def __init__(self, layers=[3, 4, 8, 3], img_size=224, in_chans=3, num_classes=80, embed_dim=[64, 128, 320, 512], + head_dim=64, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None, + drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=partial(nn.LayerNorm, eps=1e-6), + pretrained_path=None, use_checkpoint=False, checkpoint_num=[0, 0, 0, 0], + windows=False, hybrid=False, window_size=14): + """ + Args: + layer (list): number of block in each layer + img_size (int, tuple): input image size + in_chans (int): number of input channels + num_classes (int): number of classes for classification head + embed_dim (int): embedding dimension + head_dim (int): dimension of attention heads + mlp_ratio (int): ratio of mlp hidden dim to embedding dim + qkv_bias (bool): enable bias for qkv if True + qk_scale (float): override default qk scale of head_dim ** -0.5 if set + representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set + drop_rate (float): dropout rate + attn_drop_rate (float): attention dropout rate + drop_path_rate (float): stochastic depth rate + norm_layer (nn.Module): normalization layer + pretrained_path (str): path of pretrained model + use_checkpoint (bool): whether use checkpoint + checkpoint_num (list): index for using checkpoint in every stage + windows (bool): whether use window MHRA + hybrid (bool): whether use hybrid MHRA + window_size (int): size of window (>14) + """ + super().__init__() + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.checkpoint_num = checkpoint_num + self.windows = windows + print(f'Use Checkpoint: {self.use_checkpoint}') + print(f'Checkpoint Number: {self.checkpoint_num}') + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) + + self.patch_embed1 = PatchEmbed( + img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) + self.patch_embed2 = PatchEmbed( + img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1]) + self.patch_embed3 = PatchEmbed( + img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2]) + self.patch_embed4 = PatchEmbed( + img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3]) + + self.pos_drop = nn.Dropout(p=drop_rate) + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] # stochastic depth decay rule + num_heads = [dim // head_dim for dim in embed_dim] + self.blocks1 = nn.ModuleList([ + CBlock( + dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) + for i in range(layers[0])]) + self.norm1=norm_layer(embed_dim[0]) + self.blocks2 = nn.ModuleList([ + CBlock( + dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]], norm_layer=norm_layer) + for i in range(layers[1])]) + self.norm2 = norm_layer(embed_dim[1]) + if self.windows: + print('Use local window for all blocks in stage3') + self.blocks3 = nn.ModuleList([ + SABlock_Windows( + dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) + for i in range(layers[2])]) + elif hybrid: + print('Use hybrid window for blocks in stage3') + block3 = [] + for i in range(layers[2]): + if (i + 1) % 4 == 0: + block3.append(SABlock( + dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + else: + block3.append(SABlock_Windows( + dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + self.blocks3 = nn.ModuleList(block3) + else: + print('Use global window for all blocks in stage3') + self.blocks3 = nn.ModuleList([ + SABlock( + dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) + for i in range(layers[2])]) + self.norm3 = norm_layer(embed_dim[2]) + self.blocks4 = nn.ModuleList([ + SABlock( + dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]+layers[2]], norm_layer=norm_layer) + for i in range(layers[3])]) + self.norm4 = norm_layer(embed_dim[3]) + + # Representation layer + if representation_size: + self.num_features = representation_size + self.pre_logits = nn.Sequential(OrderedDict([ + ('fc', nn.Linear(embed_dim, representation_size)), + ('act', nn.Tanh()) + ])) + else: + self.pre_logits = nn.Identity() + + self.apply(self._init_weights) + self.init_weights(pretrained=pretrained_path) + + def init_weights(self, pretrained): + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) + print(f'Load pretrained model from {pretrained}') + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed', 'cls_token'} + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + out = [] + x = self.patch_embed1(x) + x = self.pos_drop(x) + for i, blk in enumerate(self.blocks1): + if self.use_checkpoint and i < self.checkpoint_num[0]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm1(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed2(x) + for i, blk in enumerate(self.blocks2): + if self.use_checkpoint and i < self.checkpoint_num[1]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm2(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed3(x) + for i, blk in enumerate(self.blocks3): + if self.use_checkpoint and i < self.checkpoint_num[2]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm3(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed4(x) + for i, blk in enumerate(self.blocks4): + if self.use_checkpoint and i < self.checkpoint_num[3]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm4(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + return tuple(out) + + def forward(self, x): + x = self.forward_features(x) + return x diff --git a/mmseg/models/backbones/vit.py b/mmseg/models/backbones/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..1d730d863b1fbf56dfb9b0b1583fa33d7a41f80d --- /dev/null +++ b/mmseg/models/backbones/vit.py @@ -0,0 +1,459 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/vision_transformer.py.""" + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from mmcv.cnn import (Conv2d, Linear, build_activation_layer, build_norm_layer, + constant_init, kaiming_init, normal_init) +from mmcv.runner import _load_checkpoint +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import DropPath, trunc_normal_ + + +class Mlp(nn.Module): + """MLP layer for Encoder block. + + Args: + in_features(int): Input dimension for the first fully + connected layer. + hidden_features(int): Output dimension for the first fully + connected layer. + out_features(int): Output dementsion for the second fully + connected layer. + act_cfg(dict): Config dict for activation layer. + Default: dict(type='GELU'). + drop(float): Drop rate for the dropout layer. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_cfg=dict(type='GELU'), + drop=0.): + super(Mlp, self).__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = Linear(in_features, hidden_features) + self.act = build_activation_layer(act_cfg) + self.fc2 = Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + """Attention layer for Encoder block. + + Args: + dim (int): Dimension for the input vector. + num_heads (int): Number of parallel attention heads. + qkv_bias (bool): Enable bias for qkv if True. Default: False. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for output weights. Default: 0. + """ + + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0.): + super(Attention, self).__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + b, n, c = x.shape + qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, + c // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(b, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Module): + """Implements encoder block with residual connection. + + Args: + dim (int): The feature dimension. + num_heads (int): Number of parallel attention heads. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop (float): Drop rate for mlp output weights. Default: 0. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for attn layer output weights. + Default: 0. + drop_path (float): Drop rate for paths of model. + Default: 0. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', requires_grad=True). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + dim, + num_heads, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + proj_drop=0., + drop_path=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + with_cp=False): + super(Block, self).__init__() + self.with_cp = with_cp + _, self.norm1 = build_norm_layer(norm_cfg, dim) + self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, + proj_drop) + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + _, self.norm2 = build_norm_layer(norm_cfg, dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_cfg=act_cfg, + drop=drop) + + def forward(self, x): + + def _inner_forward(x): + out = x + self.drop_path(self.attn(self.norm1(x))) + out = out + self.drop_path(self.mlp(self.norm2(out))) + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding. + + Args: + img_size (int | tuple): Input image size. + default: 224. + patch_size (int): Width and height for a patch. + default: 16. + in_channels (int): Input channels for images. Default: 3. + embed_dim (int): The embedding dimension. Default: 768. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dim=768): + super(PatchEmbed, self).__init__() + if isinstance(img_size, int): + self.img_size = (img_size, img_size) + elif isinstance(img_size, tuple): + self.img_size = img_size + else: + raise TypeError('img_size must be type of int or tuple') + h, w = self.img_size + self.patch_size = (patch_size, patch_size) + self.num_patches = (h // patch_size) * (w // patch_size) + self.proj = Conv2d( + in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + return self.proj(x).flatten(2).transpose(1, 2) + + +@BACKBONES.register_module() +class VisionTransformer(nn.Module): + """Vision transformer backbone. + + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for + Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 + + Args: + img_size (tuple): input image size. Default: (224, 224). + patch_size (int, tuple): patch size. Default: 16. + in_channels (int): number of input channels. Default: 3. + embed_dim (int): embedding dimension. Default: 768. + depth (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qkv_bias (bool): enable bias for qkv if True. Default: True. + qk_scale (float): override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): dropout rate. Default: 0. + attn_drop_rate (float): attention dropout rate. Default: 0. + drop_path_rate (float): Rate of DropPath. Default: 0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', eps=1e-6, requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + interpolate_mode (str): Select the interpolate mode for position + embeding vector resize. Default: bicubic. + with_cls_token (bool): If concatenating class token into image tokens + as transformer input. Default: True. + with_cp (bool): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + """ + + def __init__(self, + img_size=(224, 224), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + out_indices=11, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), + act_cfg=dict(type='GELU'), + norm_eval=False, + final_norm=False, + with_cls_token=True, + interpolate_mode='bicubic', + with_cp=False): + super(VisionTransformer, self).__init__() + self.img_size = img_size + self.patch_size = patch_size + self.features = self.embed_dim = embed_dim + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=embed_dim) + + self.with_cls_token = with_cls_token + self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) + self.pos_embed = nn.Parameter( + torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) + self.pos_drop = nn.Dropout(p=drop_rate) + + if isinstance(out_indices, int): + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) + ] # stochastic depth decay rule + self.blocks = nn.ModuleList([ + Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=dpr[i], + attn_drop=attn_drop_rate, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp) for i in range(depth) + ]) + + self.interpolate_mode = interpolate_mode + self.final_norm = final_norm + if final_norm: + _, self.norm = build_norm_layer(norm_cfg, embed_dim) + + self.norm_eval = norm_eval + self.with_cp = with_cp + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = get_root_logger() + checkpoint = _load_checkpoint(pretrained, logger=logger) + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + if 'pos_embed' in state_dict.keys(): + if self.pos_embed.shape != state_dict['pos_embed'].shape: + logger.info(msg=f'Resize the pos_embed shape from \ +{state_dict["pos_embed"].shape} to {self.pos_embed.shape}') + h, w = self.img_size + pos_size = int( + math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + state_dict['pos_embed'] = self.resize_pos_embed( + state_dict['pos_embed'], (h, w), (pos_size, pos_size), + self.patch_size, self.interpolate_mode) + + self.load_state_dict(state_dict, False) + + elif pretrained is None: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + trunc_normal_(self.pos_embed, std=.02) + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'mlp' in n: + normal_init(m.bias, std=1e-6) + else: + constant_init(m.bias, 0) + elif isinstance(m, Conv2d): + kaiming_init(m.weight, mode='fan_in') + if m.bias is not None: + constant_init(m.bias, 0) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m.bias, 0) + constant_init(m.weight, 1.0) + else: + raise TypeError('pretrained must be a str or None') + + def _pos_embeding(self, img, patched_img, pos_embed): + """Positiong embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + img (torch.Tensor): The inference image tensor, the shape + must be [B, C, H, W]. + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ + 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + if pos_len == (self.img_size[0] // self.patch_size) * ( + self.img_size[1] // self.patch_size) + 1: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + else: + raise ValueError( + 'Unexpected shape of pos_embed, got {}.'.format( + pos_embed.shape)) + pos_embed = self.resize_pos_embed(pos_embed, img.shape[2:], + (pos_h, pos_w), self.patch_size, + self.interpolate_mode) + return self.pos_drop(patched_img + pos_embed) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, patch_size, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): pos_embed weights. + input_shpae (tuple): Tuple for (input_h, intput_w). + pos_shape (tuple): Tuple for (pos_h, pos_w). + patch_size (int): Patch size. + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + input_h, input_w = input_shpae + pos_h, pos_w = pos_shape + cls_token_weight = pos_embed[:, 0] + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):] + pos_embed_weight = pos_embed_weight.reshape( + 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = F.interpolate( + pos_embed_weight, + size=[input_h // patch_size, input_w // patch_size], + align_corners=False, + mode=mode) + cls_token_weight = cls_token_weight.unsqueeze(1) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) + return pos_embed + + def forward(self, inputs): + B = inputs.shape[0] + + x = self.patch_embed(inputs) + + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(inputs, x, self.pos_embed) + + if not self.with_cls_token: + # Remove class token for transformer input + x = x[:, 1:] + + outs = [] + for i, blk in enumerate(self.blocks): + x = blk(x) + if i == len(self.blocks) - 1: + if self.final_norm: + x = self.norm(x) + if i in self.out_indices: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, inputs.shape[2] // self.patch_size, + inputs.shape[3] // self.patch_size, + C).permute(0, 3, 1, 2) + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super(VisionTransformer, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/mmseg/models/builder.py b/mmseg/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..9b68ff888cfa2caee1baf18d420bda2c977a5197 --- /dev/null +++ b/mmseg/models/builder.py @@ -0,0 +1,46 @@ +import warnings + +from mmcv.cnn import MODELS as MMCV_MODELS +from mmcv.utils import Registry + +MODELS = Registry('models', parent=MMCV_MODELS) + +BACKBONES = MODELS +NECKS = MODELS +HEADS = MODELS +LOSSES = MODELS +SEGMENTORS = MODELS + + +def build_backbone(cfg): + """Build backbone.""" + return BACKBONES.build(cfg) + + +def build_neck(cfg): + """Build neck.""" + return NECKS.build(cfg) + + +def build_head(cfg): + """Build head.""" + return HEADS.build(cfg) + + +def build_loss(cfg): + """Build loss.""" + return LOSSES.build(cfg) + + +def build_segmentor(cfg, train_cfg=None, test_cfg=None): + """Build segmentor.""" + if train_cfg is not None or test_cfg is not None: + warnings.warn( + 'train_cfg and test_cfg is deprecated, ' + 'please specify them in model', UserWarning) + assert cfg.get('train_cfg') is None or train_cfg is None, \ + 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, \ + 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build( + cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..662aae3c0022845bd173b71f48c4765767eef3e1 --- /dev/null +++ b/mmseg/models/decode_heads/__init__.py @@ -0,0 +1,28 @@ +from .ann_head import ANNHead +from .apc_head import APCHead +from .aspp_head import ASPPHead +from .cc_head import CCHead +from .da_head import DAHead +from .dm_head import DMHead +from .dnl_head import DNLHead +from .ema_head import EMAHead +from .enc_head import EncHead +from .fcn_head import FCNHead +from .fpn_head import FPNHead +from .gc_head import GCHead +from .lraspp_head import LRASPPHead +from .nl_head import NLHead +from .ocr_head import OCRHead +from .point_head import PointHead +from .psa_head import PSAHead +from .psp_head import PSPHead +from .sep_aspp_head import DepthwiseSeparableASPPHead +from .sep_fcn_head import DepthwiseSeparableFCNHead +from .uper_head import UPerHead + +__all__ = [ + 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', + 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', + 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead' +] diff --git a/mmseg/models/decode_heads/ann_head.py b/mmseg/models/decode_heads/ann_head.py new file mode 100644 index 0000000000000000000000000000000000000000..396c54e1505b3e1750abb715769c6d9c040d9906 --- /dev/null +++ b/mmseg/models/decode_heads/ann_head.py @@ -0,0 +1,245 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PPMConcat(nn.ModuleList): + """Pyramid Pooling Module that only concat the features of each layer. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + """ + + def __init__(self, pool_scales=(1, 3, 6, 8)): + super(PPMConcat, self).__init__( + [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + + def forward(self, feats): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(feats) + ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) + concat_outs = torch.cat(ppm_outs, dim=2) + return concat_outs + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Make a ANN used SelfAttentionBlock. + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_scale (int): The scale of query feature map. + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, share_key_query, query_scale, key_pool_scales, + conv_cfg, norm_cfg, act_cfg): + key_psp = PPMConcat(key_pool_scales) + if query_scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=query_scale) + else: + query_downsample = None + super(SelfAttentionBlock, self).__init__( + key_in_channels=low_in_channels, + query_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=share_key_query, + query_downsample=query_downsample, + key_downsample=key_psp, + key_query_num_convs=1, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + +class AFNB(nn.Module): + """Asymmetric Fusion Non-local Block(AFNB) + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + and query projection. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, query_scales, key_pool_scales, conv_cfg, + norm_cfg, act_cfg): + super(AFNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=False, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + out_channels + high_in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, low_feats, high_feats): + """Forward function.""" + priors = [stage(high_feats, low_feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, high_feats], 1)) + return output + + +class APNB(nn.Module): + """Asymmetric Pyramid Non-local Block (APNB) + + Args: + in_channels (int): Input channels of key/query feature, + which is the key feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, out_channels, query_scales, + key_pool_scales, conv_cfg, norm_cfg, act_cfg): + super(APNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=in_channels, + high_in_channels=in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=True, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + 2 * in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, feats): + """Forward function.""" + priors = [stage(feats, feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, feats], 1)) + return output + + +@HEADS.register_module() +class ANNHead(BaseDecodeHead): + """Asymmetric Non-local Neural Networks for Semantic Segmentation. + + This head is the implementation of `ANNNet + `_. + + Args: + project_channels (int): Projection channels for Nonlocal. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): The pooling scales of key feature map. + Default: (1, 3, 6, 8). + """ + + def __init__(self, + project_channels, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + **kwargs): + super(ANNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(self.in_channels) == 2 + low_in_channels, high_in_channels = self.in_channels + self.project_channels = project_channels + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + high_in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.context = APNB( + in_channels=self.channels, + out_channels=self.channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + low_feats, high_feats = self._transform_inputs(inputs) + output = self.fusion(low_feats, high_feats) + output = self.dropout(output) + output = self.bottleneck(output) + output = self.context(output) + output = self.cls_seg(output) + + return output diff --git a/mmseg/models/decode_heads/apc_head.py b/mmseg/models/decode_heads/apc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..2118232c964f9a50a65d7f160e33712ef49ca3ba --- /dev/null +++ b/mmseg/models/decode_heads/apc_head.py @@ -0,0 +1,158 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ACM(nn.Module): + """Adaptive Context Module used in APCNet. + + Args: + pool_scale (int): Pooling scale used in Adaptive Context + Module to extract region features. + fusion (bool): Add one conv to fuse residual feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(ACM, self).__init__() + self.pool_scale = pool_scale + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.pooled_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.global_info = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0) + + self.residual_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) + # [batch_size, channels, h, w] + x = self.input_redu_conv(x) + # [batch_size, channels, pool_scale, pool_scale] + pooled_x = self.pooled_redu_conv(pooled_x) + batch_size = x.size(0) + # [batch_size, pool_scale * pool_scale, channels] + pooled_x = pooled_x.view(batch_size, self.channels, + -1).permute(0, 2, 1).contiguous() + # [batch_size, h * w, pool_scale * pool_scale] + affinity_matrix = self.gla(x + resize( + self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:]) + ).permute(0, 2, 3, 1).reshape( + batch_size, -1, self.pool_scale**2) + affinity_matrix = F.sigmoid(affinity_matrix) + # [batch_size, h * w, channels] + z_out = torch.matmul(affinity_matrix, pooled_x) + # [batch_size, channels, h * w] + z_out = z_out.permute(0, 2, 1).contiguous() + # [batch_size, channels, h, w] + z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) + z_out = self.residual_conv(z_out) + z_out = F.relu(z_out + x) + if self.fusion: + z_out = self.fusion_conv(z_out) + + return z_out + + +@HEADS.register_module() +class APCHead(BaseDecodeHead): + """Adaptive Pyramid Context Network for Semantic Segmentation. + + This head is the implementation of + `APCNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Adaptive Context + Module. Default: (1, 2, 3, 6). + fusion (bool): Add one conv to fuse residual feature. + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): + super(APCHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.fusion = fusion + acm_modules = [] + for pool_scale in self.pool_scales: + acm_modules.append( + ACM(pool_scale, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.acm_modules = nn.ModuleList(acm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + acm_outs = [x] + for acm_module in self.acm_modules: + acm_outs.append(acm_module(x)) + acm_outs = torch.cat(acm_outs, dim=1) + output = self.bottleneck(acm_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/aspp_head.py b/mmseg/models/decode_heads/aspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..6332ab120ceb040f18ab962c74a97dd23bb17caa --- /dev/null +++ b/mmseg/models/decode_heads/aspp_head.py @@ -0,0 +1,107 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ASPPModule(nn.ModuleList): + """Atrous Spatial Pyramid Pooling (ASPP) Module. + + Args: + dilations (tuple[int]): Dilation rate of each layer. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, + act_cfg): + super(ASPPModule, self).__init__() + self.dilations = dilations + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for dilation in dilations: + self.append( + ConvModule( + self.in_channels, + self.channels, + 1 if dilation == 1 else 3, + dilation=dilation, + padding=0 if dilation == 1 else dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, x): + """Forward function.""" + aspp_outs = [] + for aspp_module in self: + aspp_outs.append(aspp_module(x)) + + return aspp_outs + + +@HEADS.register_module() +class ASPPHead(BaseDecodeHead): + """Rethinking Atrous Convolution for Semantic Image Segmentation. + + This head is the implementation of `DeepLabV3 + `_. + + Args: + dilations (tuple[int]): Dilation rates for ASPP module. + Default: (1, 6, 12, 18). + """ + + def __init__(self, dilations=(1, 6, 12, 18), **kwargs): + super(ASPPHead, self).__init__(**kwargs) + assert isinstance(dilations, (list, tuple)) + self.dilations = dilations + self.image_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.aspp_modules = ASPPModule( + dilations, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + (len(dilations) + 1) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/cascade_decode_head.py b/mmseg/models/decode_heads/cascade_decode_head.py new file mode 100644 index 0000000000000000000000000000000000000000..d02122ca0e68743b1bf7a893afae96042f23838c --- /dev/null +++ b/mmseg/models/decode_heads/cascade_decode_head.py @@ -0,0 +1,57 @@ +from abc import ABCMeta, abstractmethod + +from .decode_head import BaseDecodeHead + + +class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): + """Base class for cascade decode head used in + :class:`CascadeEncoderDecoder.""" + + def __init__(self, *args, **kwargs): + super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) + + @abstractmethod + def forward(self, inputs, prev_output): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs, prev_output) + losses = self.losses(seg_logits, gt_semantic_seg) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs, prev_output) diff --git a/mmseg/models/decode_heads/cc_head.py b/mmseg/models/decode_heads/cc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..95c2706a5d4d7877895146c5ebf4396a78d29a8d --- /dev/null +++ b/mmseg/models/decode_heads/cc_head.py @@ -0,0 +1,42 @@ +import torch + +from ..builder import HEADS +from .fcn_head import FCNHead + +try: + from mmcv.ops import CrissCrossAttention +except ModuleNotFoundError: + CrissCrossAttention = None + + +@HEADS.register_module() +class CCHead(FCNHead): + """CCNet: Criss-Cross Attention for Semantic Segmentation. + + This head is the implementation of `CCNet + `_. + + Args: + recurrence (int): Number of recurrence of Criss Cross Attention + module. Default: 2. + """ + + def __init__(self, recurrence=2, **kwargs): + if CrissCrossAttention is None: + raise RuntimeError('Please install mmcv-full for ' + 'CrissCrossAttention ops') + super(CCHead, self).__init__(num_convs=2, **kwargs) + self.recurrence = recurrence + self.cca = CrissCrossAttention(self.channels) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + for _ in range(self.recurrence): + output = self.cca(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/da_head.py b/mmseg/models/decode_heads/da_head.py new file mode 100644 index 0000000000000000000000000000000000000000..8ee0e08c3d69ee4392aa550072a043548c377571 --- /dev/null +++ b/mmseg/models/decode_heads/da_head.py @@ -0,0 +1,178 @@ +import torch +import torch.nn.functional as F +from mmcv.cnn import ConvModule, Scale +from torch import nn + +from mmseg.core import add_prefix +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(PAM, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super(PAM, self).forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super(CAM, self).__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@HEADS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super(DAHead, self).__init__(**kwargs) + self.pam_channels = pam_channels + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, only ``pam_cam`` is used.""" + return self.forward(inputs)[0] + + def losses(self, seg_logit, seg_label): + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update( + add_prefix( + super(DAHead, self).losses(pam_cam_seg_logit, seg_label), + 'pam_cam')) + loss.update( + add_prefix( + super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update( + add_prefix( + super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + return loss diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py new file mode 100644 index 0000000000000000000000000000000000000000..86b9b63f43818c4b842a25d143060b07c2b401e1 --- /dev/null +++ b/mmseg/models/decode_heads/decode_head.py @@ -0,0 +1,234 @@ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn +from mmcv.cnn import normal_init +from mmcv.runner import auto_fp16, force_fp32 + +from mmseg.core import build_pixel_sampler +from mmseg.ops import resize +from ..builder import build_loss +from ..losses import accuracy + + +class BaseDecodeHead(nn.Module, metaclass=ABCMeta): + """Base class for BaseDecodeHead. + + Args: + in_channels (int|Sequence[int]): Input channels. + channels (int): Channels after modules, before conv_seg. + num_classes (int): Number of classes. + dropout_ratio (float): Ratio of dropout layer. Default: 0.1. + conv_cfg (dict|None): Config of conv layers. Default: None. + norm_cfg (dict|None): Config of norm layers. Default: None. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU') + in_index (int|Sequence[int]): Input feature index. Default: -1 + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + Default: None. + loss_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss'). + ignore_index (int | None): The label index to be ignored. When using + masked BCE loss, ignore_index should be set to None. Default: 255 + sampler (dict|None): The config of segmentation map sampler. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + """ + + def __init__(self, + in_channels, + channels, + *, + num_classes, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False): + super(BaseDecodeHead, self).__init__() + self._init_inputs(in_channels, in_index, input_transform) + self.channels = channels + self.num_classes = num_classes + self.dropout_ratio = dropout_ratio + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.in_index = in_index + self.loss_decode = build_loss(loss_decode) + self.ignore_index = ignore_index + self.align_corners = align_corners + if sampler is not None: + self.sampler = build_pixel_sampler(sampler, context=self) + else: + self.sampler = None + + self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1) + if dropout_ratio > 0: + self.dropout = nn.Dropout2d(dropout_ratio) + else: + self.dropout = None + self.fp16_enabled = False + + def extra_repr(self): + """Extra repr.""" + s = f'input_transform={self.input_transform}, ' \ + f'ignore_index={self.ignore_index}, ' \ + f'align_corners={self.align_corners}' + return s + + def _init_inputs(self, in_channels, in_index, input_transform): + """Check and initialize input transforms. + + The in_channels, in_index and input_transform must match. + Specifically, when input_transform is None, only single feature map + will be selected. So in_channels and in_index must be of type int. + When input_transform + + Args: + in_channels (int|Sequence[int]): Input channels. + in_index (int|Sequence[int]): Input feature index. + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + """ + + if input_transform is not None: + assert input_transform in ['resize_concat', 'multiple_select'] + self.input_transform = input_transform + self.in_index = in_index + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(in_index, (list, tuple)) + assert len(in_channels) == len(in_index) + if input_transform == 'resize_concat': + self.in_channels = sum(in_channels) + else: + self.in_channels = in_channels + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.in_channels = in_channels + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.conv_seg, mean=0, std=0.01) + + def _transform_inputs(self, inputs): + """Transform inputs for decoder. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + Tensor: The transformed inputs + """ + + if self.input_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + upsampled_inputs = [ + resize( + input=x, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for x in inputs + ] + inputs = torch.cat(upsampled_inputs, dim=1) + elif self.input_transform == 'multiple_select': + inputs = [inputs[i] for i in self.in_index] + else: + inputs = inputs[self.in_index] + + return inputs + + @auto_fp16() + @abstractmethod + def forward(self, inputs): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs) + losses = self.losses(seg_logits, gt_semantic_seg) + return losses + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs) + + def cls_seg(self, feat): + """Classify each pixel.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.conv_seg(feat) + return output + + @force_fp32(apply_to=('seg_logit', )) + def losses(self, seg_logit, seg_label): + """Compute segmentation loss.""" + loss = dict() + seg_logit = resize( + input=seg_logit, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + if self.sampler is not None: + seg_weight = self.sampler.sample(seg_logit, seg_label) + else: + seg_weight = None + seg_label = seg_label.squeeze(1) + loss['loss_seg'] = self.loss_decode( + seg_logit, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + loss['acc_seg'] = accuracy(seg_logit, seg_label) + return loss diff --git a/mmseg/models/decode_heads/dm_head.py b/mmseg/models/decode_heads/dm_head.py new file mode 100644 index 0000000000000000000000000000000000000000..3161b0648879127c169fede162bc3ee16e4692c0 --- /dev/null +++ b/mmseg/models/decode_heads/dm_head.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class DCM(nn.Module): + """Dynamic Convolutional Module used in DMNet. + + Args: + filter_size (int): The filter size of generated convolution kernel + used in Dynamic Convolutional Module. + fusion (bool): Add one conv to fuse DCM output feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(DCM, self).__init__() + self.filter_size = filter_size + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, + 0) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.norm_cfg is not None: + self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] + else: + self.norm = None + self.activate = build_activation_layer(self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + generated_filter = self.filter_gen_conv( + F.adaptive_avg_pool2d(x, self.filter_size)) + x = self.input_redu_conv(x) + b, c, h, w = x.shape + # [1, b * c, h, w], c = self.channels + x = x.view(1, b * c, h, w) + # [b * c, 1, filter_size, filter_size] + generated_filter = generated_filter.view(b * c, 1, self.filter_size, + self.filter_size) + pad = (self.filter_size - 1) // 2 + if (self.filter_size - 1) % 2 == 0: + p2d = (pad, pad, pad, pad) + else: + p2d = (pad + 1, pad, pad + 1, pad) + x = F.pad(input=x, pad=p2d, mode='constant', value=0) + # [1, b * c, h, w] + output = F.conv2d(input=x, weight=generated_filter, groups=b * c) + # [b, c, h, w] + output = output.view(b, c, h, w) + if self.norm is not None: + output = self.norm(output) + output = self.activate(output) + + if self.fusion: + output = self.fusion_conv(output) + + return output + + +@HEADS.register_module() +class DMHead(BaseDecodeHead): + """Dynamic Multi-scale Filters for Semantic Segmentation. + + This head is the implementation of + `DMNet `_. + + Args: + filter_sizes (tuple[int]): The size of generated convolutional filters + used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). + fusion (bool): Add one conv to fuse DCM output feature. + """ + + def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): + super(DMHead, self).__init__(**kwargs) + assert isinstance(filter_sizes, (list, tuple)) + self.filter_sizes = filter_sizes + self.fusion = fusion + dcm_modules = [] + for filter_size in self.filter_sizes: + dcm_modules.append( + DCM(filter_size, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.dcm_modules = nn.ModuleList(dcm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(filter_sizes) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + dcm_outs = [x] + for dcm_module in self.dcm_modules: + dcm_outs.append(dcm_module(x)) + dcm_outs = torch.cat(dcm_outs, dim=1) + output = self.bottleneck(dcm_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/dnl_head.py b/mmseg/models/decode_heads/dnl_head.py new file mode 100644 index 0000000000000000000000000000000000000000..52a662ccb6ae8ff00930eb54ed71113724b6494e --- /dev/null +++ b/mmseg/models/decode_heads/dnl_head.py @@ -0,0 +1,131 @@ +import torch +from mmcv.cnn import NonLocal2d +from torch import nn + +from ..builder import HEADS +from .fcn_head import FCNHead + + +class DisentangledNonLocal2d(NonLocal2d): + """Disentangled Non-Local Blocks. + + Args: + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, *arg, temperature, **kwargs): + super().__init__(*arg, **kwargs) + self.temperature = temperature + self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) + + def embedded_gaussian(self, theta_x, phi_x): + """Embedded gaussian with temperature.""" + + # NonLocal2d pairwise_weight: [N, HxW, HxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight /= self.temperature + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def forward(self, x): + # x: [N, C, H, W] + n = x.size(0) + + # g_x: [N, HxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # theta_x: [N, HxW, C], phi_x: [N, C, HxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + # subtract mean + theta_x -= theta_x.mean(dim=-2, keepdim=True) + phi_x -= phi_x.mean(dim=-1, keepdim=True) + + pairwise_func = getattr(self, self.mode) + # pairwise_weight: [N, HxW, HxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # y: [N, HxW, C] + y = torch.matmul(pairwise_weight, g_x) + # y: [N, C, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + # unary_mask: [N, 1, HxW] + unary_mask = self.conv_mask(x) + unary_mask = unary_mask.view(n, 1, -1) + unary_mask = unary_mask.softmax(dim=-1) + # unary_x: [N, 1, C] + unary_x = torch.matmul(unary_mask, g_x) + # unary_x: [N, C, 1, 1] + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape( + n, self.inter_channels, 1, 1) + + output = x + self.conv_out(y + unary_x) + + return output + + +@HEADS.register_module() +class DNLHead(FCNHead): + """Disentangled Non-Local Neural Networks. + + This head is the implementation of `DNLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: False. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + temperature=0.05, + **kwargs): + super(DNLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.temperature = temperature + self.dnl_block = DisentangledNonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + temperature=self.temperature) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.dnl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/ema_head.py b/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 0000000000000000000000000000000000000000..619d757046a61e2dd1ad4b88a8b85f4688f35492 --- /dev/null +++ b/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,168 @@ +import math + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +def reduce_mean(tensor): + """Reduce mean when distributed training.""" + if not (dist.is_available() and dist.is_initialized()): + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + return tensor + + +class EMAModule(nn.Module): + """Expectation Maximization Attention Module used in EMANet. + + Args: + channels (int): Channels of the whole module. + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + """ + + def __init__(self, channels, num_bases, num_stages, momentum): + super(EMAModule, self).__init__() + assert num_stages >= 1, 'num_stages must be at least 1!' + self.num_bases = num_bases + self.num_stages = num_stages + self.momentum = momentum + + bases = torch.zeros(1, channels, self.num_bases) + bases.normal_(0, math.sqrt(2. / self.num_bases)) + # [1, channels, num_bases] + bases = F.normalize(bases, dim=1, p=2) + self.register_buffer('bases', bases) + + def forward(self, feats): + """Forward function.""" + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + + feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + + if self.training: + bases = bases.mean(dim=0, keepdim=True) + bases = reduce_mean(bases) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + self.bases = (1 - + self.momentum) * self.bases + self.momentum * bases + + return feats_recon + + +@HEADS.register_module() +class EMAHead(BaseDecodeHead): + """Expectation Maximization Attention Networks for Semantic Segmentation. + + This head is the implementation of `EMANet + `_. + + Args: + ema_channels (int): EMA module channels + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + concat_input (bool): Whether concat the input and output of convs + before classification layer. Default: True + momentum (float): Momentum to update the base. Default: 0.1. + """ + + def __init__(self, + ema_channels, + num_bases, + num_stages, + concat_input=True, + momentum=0.1, + **kwargs): + super(EMAHead, self).__init__(**kwargs) + self.ema_channels = ema_channels + self.num_bases = num_bases + self.num_stages = num_stages + self.concat_input = concat_input + self.momentum = momentum + self.ema_module = EMAModule(self.ema_channels, self.num_bases, + self.num_stages, self.momentum) + + self.ema_in_conv = ConvModule( + self.in_channels, + self.ema_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # project (0, inf) -> (-inf, inf) + self.ema_mid_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=None, + act_cfg=None) + for param in self.ema_mid_conv.parameters(): + param.requires_grad = False + + self.ema_out_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.bottleneck = ConvModule( + self.ema_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.ema_in_conv(x) + identity = feats + feats = self.ema_mid_conv(feats) + recon = self.ema_module(feats) + recon = F.relu(recon, inplace=True) + recon = self.ema_out_conv(recon) + output = F.relu(identity + recon, inplace=True) + output = self.bottleneck(output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/enc_head.py b/mmseg/models/decode_heads/enc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..0c11994cf6272bd52ed3576486f4b8d7366af940 --- /dev/null +++ b/mmseg/models/decode_heads/enc_head.py @@ -0,0 +1,187 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_norm_layer + +from mmseg.ops import Encoding, resize +from ..builder import HEADS, build_loss +from .decode_head import BaseDecodeHead + + +class EncModule(nn.Module): + """Encoding Module used in EncNet. + + Args: + in_channels (int): Input channels. + num_codes (int): Number of code words. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): + super(EncModule, self).__init__() + self.encoding_project = ConvModule( + in_channels, + in_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # TODO: resolve this hack + # change to 1d + if norm_cfg is not None: + encoding_norm_cfg = norm_cfg.copy() + if encoding_norm_cfg['type'] in ['BN', 'IN']: + encoding_norm_cfg['type'] += '1d' + else: + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace( + '2d', '1d') + else: + # fallback to BN1d + encoding_norm_cfg = dict(type='BN1d') + self.encoding = nn.Sequential( + Encoding(channels=in_channels, num_codes=num_codes), + build_norm_layer(encoding_norm_cfg, num_codes)[1], + nn.ReLU(inplace=True)) + self.fc = nn.Sequential( + nn.Linear(in_channels, in_channels), nn.Sigmoid()) + + def forward(self, x): + """Forward function.""" + encoding_projection = self.encoding_project(x) + encoding_feat = self.encoding(encoding_projection).mean(dim=1) + batch_size, channels, _, _ = x.size() + gamma = self.fc(encoding_feat) + y = gamma.view(batch_size, channels, 1, 1) + output = F.relu_(x + x * y) + return encoding_feat, output + + +@HEADS.register_module() +class EncHead(BaseDecodeHead): + """Context Encoding for Semantic Segmentation. + + This head is the implementation of `EncNet + `_. + + Args: + num_codes (int): Number of code words. Default: 32. + use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to + regularize the training. Default: True. + add_lateral (bool): Whether use lateral connection to fuse features. + Default: False. + loss_se_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss', use_sigmoid=True). + """ + + def __init__(self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=0.2), + **kwargs): + super(EncHead, self).__init__( + input_transform='multiple_select', **kwargs) + self.use_se_loss = use_se_loss + self.add_lateral = add_lateral + self.num_codes = num_codes + self.bottleneck = ConvModule( + self.in_channels[-1], + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if add_lateral: + self.lateral_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the last one + self.lateral_convs.append( + ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.fusion = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.enc_module = EncModule( + self.channels, + num_codes=num_codes, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.use_se_loss: + self.loss_se_decode = build_loss(loss_se_decode) + self.se_layer = nn.Linear(self.channels, self.num_classes) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + feat = self.bottleneck(inputs[-1]) + if self.add_lateral: + laterals = [ + resize( + lateral_conv(inputs[i]), + size=feat.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + feat = self.fusion(torch.cat([feat, *laterals], 1)) + encode_feat, output = self.enc_module(feat) + output = self.cls_seg(output) + if self.use_se_loss: + se_output = self.se_layer(encode_feat) + return output, se_output + else: + return output + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, ignore se_loss.""" + if self.use_se_loss: + return self.forward(inputs)[0] + else: + return self.forward(inputs) + + @staticmethod + def _convert_to_onehot_labels(seg_label, num_classes): + """Convert segmentation label to onehot. + + Args: + seg_label (Tensor): Segmentation label of shape (N, H, W). + num_classes (int): Number of classes. + + Returns: + Tensor: Onehot labels of shape (N, num_classes). + """ + + batch_size = seg_label.size(0) + onehot_labels = seg_label.new_zeros((batch_size, num_classes)) + for i in range(batch_size): + hist = seg_label[i].float().histc( + bins=num_classes, min=0, max=num_classes - 1) + onehot_labels[i] = hist > 0 + return onehot_labels + + def losses(self, seg_logit, seg_label): + """Compute segmentation and semantic encoding loss.""" + seg_logit, se_seg_logit = seg_logit + loss = dict() + loss.update(super(EncHead, self).losses(seg_logit, seg_label)) + se_loss = self.loss_se_decode( + se_seg_logit, + self._convert_to_onehot_labels(seg_label, self.num_classes)) + loss['loss_se'] = se_loss + return loss diff --git a/mmseg/models/decode_heads/fcn_head.py b/mmseg/models/decode_heads/fcn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..4ea3742f0b685ffc891d70db86cd6ea058283fd6 --- /dev/null +++ b/mmseg/models/decode_heads/fcn_head.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FCNHead(BaseDecodeHead): + """Fully Convolution Networks for Semantic Segmentation. + + This head is implemented of `FCNNet `_. + + Args: + num_convs (int): Number of convs in the head. Default: 2. + kernel_size (int): The kernel size for convs in the head. Default: 3. + concat_input (bool): Whether concat the input and output of convs + before classification layer. + dilation (int): The dilation rate for convs in the head. Default: 1. + """ + + def __init__(self, + num_convs=2, + kernel_size=3, + concat_input=True, + dilation=1, + **kwargs): + assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) + self.num_convs = num_convs + self.concat_input = concat_input + self.kernel_size = kernel_size + super(FCNHead, self).__init__(**kwargs) + if num_convs == 0: + assert self.in_channels == self.channels + + conv_padding = (kernel_size // 2) * dilation + convs = [] + convs.append( + ConvModule( + self.in_channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + for i in range(num_convs - 1): + convs.append( + ConvModule( + self.channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if num_convs == 0: + self.convs = nn.Identity() + else: + self.convs = nn.Sequential(*convs) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=kernel_size, + padding=kernel_size // 2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs(x) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/fpn_head.py b/mmseg/models/decode_heads/fpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9b6ada005995cc7e7f0b0cd66f4880d6a2bd665b --- /dev/null +++ b/mmseg/models/decode_heads/fpn_head.py @@ -0,0 +1,68 @@ +import numpy as np +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FPNHead(BaseDecodeHead): + """Panoptic Feature Pyramid Networks. + + This head is the implementation of `Semantic FPN + `_. + + Args: + feature_strides (tuple[int]): The strides for input feature maps. + stack_lateral. All strides suppose to be power of 2. The first + one is of largest resolution. + """ + + def __init__(self, feature_strides, **kwargs): + super(FPNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(feature_strides) == len(self.in_channels) + assert min(feature_strides) == feature_strides[0] + self.feature_strides = feature_strides + + self.scale_heads = nn.ModuleList() + for i in range(len(feature_strides)): + head_length = max( + 1, + int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + scale_head = [] + for k in range(head_length): + scale_head.append( + ConvModule( + self.in_channels[i] if k == 0 else self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if feature_strides[i] != feature_strides[0]: + scale_head.append( + nn.Upsample( + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners)) + self.scale_heads.append(nn.Sequential(*scale_head)) + + def forward(self, inputs): + + x = self._transform_inputs(inputs) + + output = self.scale_heads[0](x[0]) + for i in range(1, len(self.feature_strides)): + # non inplace + output = output + resize( + self.scale_heads[i](x[i]), + size=output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/gc_head.py b/mmseg/models/decode_heads/gc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..93f60ad61c4d78ace0a7e0e6a710c0248c0bcc82 --- /dev/null +++ b/mmseg/models/decode_heads/gc_head.py @@ -0,0 +1,47 @@ +import torch +from mmcv.cnn import ContextBlock + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class GCHead(FCNHead): + """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. + + This head is the implementation of `GCNet + `_. + + Args: + ratio (float): Multiplier of channels ratio. Default: 1/4. + pooling_type (str): The pooling type of context aggregation. + Options are 'att', 'avg'. Default: 'avg'. + fusion_types (tuple[str]): The fusion type for feature fusion. + Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) + """ + + def __init__(self, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + **kwargs): + super(GCHead, self).__init__(num_convs=2, **kwargs) + self.ratio = ratio + self.pooling_type = pooling_type + self.fusion_types = fusion_types + self.gc_block = ContextBlock( + in_channels=self.channels, + ratio=self.ratio, + pooling_type=self.pooling_type, + fusion_types=self.fusion_types) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.gc_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/lraspp_head.py b/mmseg/models/decode_heads/lraspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..32a093caded74a97e991ca61d45bec888396c9f2 --- /dev/null +++ b/mmseg/models/decode_heads/lraspp_head.py @@ -0,0 +1,90 @@ +import torch +import torch.nn as nn +from mmcv import is_tuple_of +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class LRASPPHead(BaseDecodeHead): + """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. + + This head is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + branch_channels (tuple[int]): The number of output channels in every + each branch. Default: (32, 64). + """ + + def __init__(self, branch_channels=(32, 64), **kwargs): + super(LRASPPHead, self).__init__(**kwargs) + if self.input_transform != 'multiple_select': + raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'') + assert is_tuple_of(branch_channels, int) + assert len(branch_channels) == len(self.in_channels) - 1 + self.branch_channels = branch_channels + + self.convs = nn.Sequential() + self.conv_ups = nn.Sequential() + for i in range(len(branch_channels)): + self.convs.add_module( + f'conv{i}', + nn.Conv2d( + self.in_channels[i], branch_channels[i], 1, bias=False)) + self.conv_ups.add_module( + f'conv_up{i}', + ConvModule( + self.channels + branch_channels[i], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False)) + + self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) + + self.aspp_conv = ConvModule( + self.in_channels[-1], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False) + self.image_pool = nn.Sequential( + nn.AvgPool2d(kernel_size=49, stride=(16, 20)), + ConvModule( + self.in_channels[2], + self.channels, + 1, + act_cfg=dict(type='Sigmoid'), + bias=False)) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + x = inputs[-1] + + x = self.aspp_conv(x) * resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = self.conv_up_input(x) + + for i in range(len(self.branch_channels) - 1, -1, -1): + x = resize( + x, + size=inputs[i].size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = torch.cat([x, self.convs[i](inputs[i])], 1) + x = self.conv_ups[i](x) + + return self.cls_seg(x) diff --git a/mmseg/models/decode_heads/nl_head.py b/mmseg/models/decode_heads/nl_head.py new file mode 100644 index 0000000000000000000000000000000000000000..31658755a6599bc9f52bd59767ef60452d5b9fbb --- /dev/null +++ b/mmseg/models/decode_heads/nl_head.py @@ -0,0 +1,49 @@ +import torch +from mmcv.cnn import NonLocal2d + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class NLHead(FCNHead): + """Non-local Neural Networks. + + This head is the implementation of `NLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: True. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + **kwargs): + super(NLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.nl_block = NonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.nl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/ocr_head.py b/mmseg/models/decode_heads/ocr_head.py new file mode 100644 index 0000000000000000000000000000000000000000..e180e10276e9a4d794501d0f399740c189775673 --- /dev/null +++ b/mmseg/models/decode_heads/ocr_head.py @@ -0,0 +1,127 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .cascade_decode_head import BaseCascadeDecodeHead + + +class SpatialGatherModule(nn.Module): + """Aggregate the context features according to the initial predicted + probability distribution. + + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, scale): + super(SpatialGatherModule, self).__init__() + self.scale = scale + + def forward(self, feats, probs): + """Forward function.""" + batch_size, num_classes, height, width = probs.size() + channels = feats.size(1) + probs = probs.view(batch_size, num_classes, -1) + feats = feats.view(batch_size, channels, -1) + # [batch_size, height*width, num_classes] + feats = feats.permute(0, 2, 1) + # [batch_size, channels, height*width] + probs = F.softmax(self.scale * probs, dim=2) + # [batch_size, channels, num_classes] + ocr_context = torch.matmul(probs, feats) + ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) + return ocr_context + + +class ObjectAttentionBlock(_SelfAttentionBlock): + """Make a OCR used SelfAttentionBlock.""" + + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, + act_cfg): + if scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=scale) + else: + query_downsample = None + super(ObjectAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=query_downsample, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=True, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.bottleneck = ConvModule( + in_channels * 2, + in_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, query_feats, key_feats): + """Forward function.""" + context = super(ObjectAttentionBlock, + self).forward(query_feats, key_feats) + output = self.bottleneck(torch.cat([context, query_feats], dim=1)) + if self.query_downsample is not None: + output = resize(query_feats) + + return output + + +@HEADS.register_module() +class OCRHead(BaseCascadeDecodeHead): + """Object-Contextual Representations for Semantic Segmentation. + + This head is the implementation of `OCRNet + `_. + + Args: + ocr_channels (int): The intermediate channels of OCR block. + scale (int): The scale of probability map in SpatialGatherModule in + Default: 1. + """ + + def __init__(self, ocr_channels, scale=1, **kwargs): + super(OCRHead, self).__init__(**kwargs) + self.ocr_channels = ocr_channels + self.scale = scale + self.object_context_block = ObjectAttentionBlock( + self.channels, + self.ocr_channels, + self.scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.spatial_gather_module = SpatialGatherModule(self.scale) + + self.bottleneck = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs, prev_output): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.bottleneck(x) + context = self.spatial_gather_module(feats, prev_output) + object_context = self.object_context_block(feats, context) + output = self.cls_seg(object_context) + + return output diff --git a/mmseg/models/decode_heads/point_head.py b/mmseg/models/decode_heads/point_head.py new file mode 100644 index 0000000000000000000000000000000000000000..90a23635d935abf1179f181f04ab0f4265c6a1c7 --- /dev/null +++ b/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,349 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, normal_init +from mmcv.ops import point_sample + +from mmseg.models.builder import HEADS +from mmseg.ops import resize +from ..losses import accuracy +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@HEADS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__(self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs): + super(PointHead, self).__init__( + input_transform='multiple_select', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ + else 0 + self.fc_seg = nn.Conv1d( + fc_in_channels, + self.num_classes, + kernel_size=1, + stride=1, + padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.fc_seg, std=0.001) + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [ + point_sample(_, points, align_corners=self.align_corners) + for _ in x + ] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample( + prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train( + prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + point_label = point_sample( + gt_semantic_seg.float(), + points, + mode='nearest', + align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + losses = self.losses(point_logits, point_label) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test( + refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats( + prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape( + batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_( + 2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view( + batch_size, channels, height, width) + + return refined_seg_logits + + def losses(self, point_logits, point_label): + """Compute segmentation loss.""" + loss = dict() + loss['loss_point'] = self.loss_decode( + point_logits, point_label, ignore_index=self.ignore_index) + loss['acc_point'] = accuracy(point_logits, point_label) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand( + batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros( + batch_size, + num_points, + 2, + dtype=torch.float, + device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % + width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // + width).float() * h_step + return point_indices, point_coords diff --git a/mmseg/models/decode_heads/psa_head.py b/mmseg/models/decode_heads/psa_head.py new file mode 100644 index 0000000000000000000000000000000000000000..8d915e57f4d22bd723da23b6f03f245476649846 --- /dev/null +++ b/mmseg/models/decode_heads/psa_head.py @@ -0,0 +1,196 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + +try: + from mmcv.ops import PSAMask +except ModuleNotFoundError: + PSAMask = None + + +@HEADS.register_module() +class PSAHead(BaseDecodeHead): + """Point-wise Spatial Attention Network for Scene Parsing. + + This head is the implementation of `PSANet + `_. + + Args: + mask_size (tuple[int]): The PSA mask size. It usually equals input + size. + psa_type (str): The type of psa module. Options are 'collect', + 'distribute', 'bi-direction'. Default: 'bi-direction' + compact (bool): Whether use compact map for 'collect' mode. + Default: True. + shrink_factor (int): The downsample factors of psa mask. Default: 2. + normalization_factor (float): The normalize factor of attention. + psa_softmax (bool): Whether use softmax for attention. + """ + + def __init__(self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs): + if PSAMask is None: + raise RuntimeError('Please install mmcv-full for PSAMask ops') + super(PSAHead, self).__init__(**kwargs) + assert psa_type in ['collect', 'distribute', 'bi-direction'] + self.psa_type = psa_type + self.compact = compact + self.shrink_factor = shrink_factor + self.mask_size = mask_size + mask_h, mask_w = mask_size + self.psa_softmax = psa_softmax + if normalization_factor is None: + normalization_factor = mask_h * mask_w + self.normalization_factor = normalization_factor + + self.reduce = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + if psa_type == 'bi-direction': + self.reduce_p = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention_p = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + self.psamask_collect = PSAMask('collect', mask_size) + self.psamask_distribute = PSAMask('distribute', mask_size) + else: + self.psamask = PSAMask(psa_type, mask_size) + self.proj = ConvModule( + self.channels * (2 if psa_type == 'bi-direction' else 1), + self.in_channels, + kernel_size=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + self.in_channels * 2, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + identity = x + align_corners = self.align_corners + if self.psa_type in ['collect', 'distribute']: + out = self.reduce(x) + n, c, h, w = out.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + out = resize( + out, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y = self.attention(out) + if self.compact: + if self.psa_type == 'collect': + y = y.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y = self.psamask(y) + if self.psa_softmax: + y = F.softmax(y, dim=1) + out = torch.bmm( + out.view(n, c, h * w), y.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + else: + x_col = self.reduce(x) + x_dis = self.reduce_p(x) + n, c, h, w = x_col.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + x_col = resize( + x_col, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + x_dis = resize( + x_dis, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y_col = self.attention(x_col) + y_dis = self.attention_p(x_dis) + if self.compact: + y_dis = y_dis.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y_col = self.psamask_collect(y_col) + y_dis = self.psamask_distribute(y_dis) + if self.psa_softmax: + y_col = F.softmax(y_col, dim=1) + y_dis = F.softmax(y_dis, dim=1) + x_col = torch.bmm( + x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + x_dis = torch.bmm( + x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + out = torch.cat([x_col, x_dis], 1) + out = self.proj(out) + out = resize( + out, + size=identity.shape[2:], + mode='bilinear', + align_corners=align_corners) + out = self.bottleneck(torch.cat((identity, out), dim=1)) + out = self.cls_seg(out) + return out diff --git a/mmseg/models/decode_heads/psp_head.py b/mmseg/models/decode_heads/psp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..bdbe2c8ac8dc2d21dd3e21aa5ed9f74504545c62 --- /dev/null +++ b/mmseg/models/decode_heads/psp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class PPM(nn.ModuleList): + """Pooling Pyramid Module used in PSPNet. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + align_corners (bool): align_corners argument of F.interpolate. + """ + + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, + act_cfg, align_corners): + super(PPM, self).__init__() + self.pool_scales = pool_scales + self.align_corners = align_corners + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for pool_scale in pool_scales: + self.append( + nn.Sequential( + nn.AdaptiveAvgPool2d(pool_scale), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg))) + + def forward(self, x): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(x) + upsampled_ppm_out = resize( + ppm_out, + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ppm_outs.append(upsampled_ppm_out) + return ppm_outs + + +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + """Pyramid Scene Parsing Network. + + This head is the implementation of + `PSPNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.psp_modules = PPM( + self.pool_scales, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/sep_aspp_head.py b/mmseg/models/decode_heads/sep_aspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..50bd52bcff62d0f791c42731bdf05a64276f50b9 --- /dev/null +++ b/mmseg/models/decode_heads/sep_aspp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .aspp_head import ASPPHead, ASPPModule + + +class DepthwiseSeparableASPPModule(ASPPModule): + """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable + conv.""" + + def __init__(self, **kwargs): + super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) + for i, dilation in enumerate(self.dilations): + if dilation > 1: + self[i] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + 3, + dilation=dilation, + padding=dilation, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + +@HEADS.register_module() +class DepthwiseSeparableASPPHead(ASPPHead): + """Encoder-Decoder with Atrous Separable Convolution for Semantic Image + Segmentation. + + This head is the implementation of `DeepLabV3+ + `_. + + Args: + c1_in_channels (int): The input channels of c1 decoder. If is 0, + the no decoder will be used. + c1_channels (int): The intermediate channels of c1 decoder. + """ + + def __init__(self, c1_in_channels, c1_channels, **kwargs): + super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) + assert c1_in_channels >= 0 + self.aspp_modules = DepthwiseSeparableASPPModule( + dilations=self.dilations, + in_channels=self.in_channels, + channels=self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if c1_in_channels > 0: + self.c1_bottleneck = ConvModule( + c1_in_channels, + c1_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + else: + self.c1_bottleneck = None + self.sep_bottleneck = nn.Sequential( + DepthwiseSeparableConvModule( + self.channels + c1_channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + DepthwiseSeparableConvModule( + self.channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + if self.c1_bottleneck is not None: + c1_output = self.c1_bottleneck(inputs[0]) + output = resize( + input=output, + size=c1_output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = torch.cat([output, c1_output], dim=1) + output = self.sep_bottleneck(output) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/sep_fcn_head.py b/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a636f702e72d12fd6ff2821fd10923f682f805f1 --- /dev/null +++ b/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,51 @@ +from mmcv.cnn import DepthwiseSeparableConvModule + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class DepthwiseSeparableFCNHead(FCNHead): + """Depthwise-Separable Fully Convolutional Network for Semantic + Segmentation. + + This head is implemented according to Fast-SCNN paper. + Args: + in_channels(int): Number of output channels of FFM. + channels(int): Number of middle-stage channels in the decode head. + concat_input(bool): Whether to concatenate original decode input into + the result of several consecutive convolution layers. + Default: True. + num_classes(int): Used to determine the dimension of + final prediction tensor. + in_index(int): Correspond with 'out_indices' in FastSCNN backbone. + norm_cfg (dict | None): Config of norm layers. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + loss_decode(dict): Config of loss type and some + relevant additional options. + """ + + def __init__(self, **kwargs): + super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) diff --git a/mmseg/models/decode_heads/uper_head.py b/mmseg/models/decode_heads/uper_head.py new file mode 100644 index 0000000000000000000000000000000000000000..bb617f6b13a1b359b0fa932300161e0d405d046d --- /dev/null +++ b/mmseg/models/decode_heads/uper_head.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead +from .psp_head import PPM + + +@HEADS.register_module() +class UPerHead(BaseDecodeHead): + """Unified Perceptual Parsing for Scene Understanding. + + This head is the implementation of `UPerNet + `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module applied on the last feature. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(UPerHead, self).__init__( + input_transform='multiple_select', **kwargs) + # PSP Module + self.psp_modules = PPM( + pool_scales, + self.in_channels[-1], + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels[-1] + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # FPN Module + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the top layer + l_conv = ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + fpn_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + self.fpn_bottleneck = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def psp_forward(self, inputs): + """Forward function of PSP module.""" + x = inputs[-1] + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + + return output + + def forward(self, inputs): + """Forward function.""" + + inputs = self._transform_inputs(inputs) + + # build laterals + laterals = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + laterals.append(self.psp_forward(inputs)) + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += resize( + laterals[i], + size=prev_shape, + mode='bilinear', + align_corners=self.align_corners) + + # build outputs + fpn_outs = [ + self.fpn_convs[i](laterals[i]) + for i in range(used_backbone_levels - 1) + ] + # append psp feature + fpn_outs.append(laterals[-1]) + + for i in range(used_backbone_levels - 1, 0, -1): + fpn_outs[i] = resize( + fpn_outs[i], + size=fpn_outs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fpn_outs = torch.cat(fpn_outs, dim=1) + output = self.fpn_bottleneck(fpn_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/losses/__init__.py b/mmseg/models/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..beca72045694273d63465bac2f27dbc6672271db --- /dev/null +++ b/mmseg/models/losses/__init__.py @@ -0,0 +1,12 @@ +from .accuracy import Accuracy, accuracy +from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, + cross_entropy, mask_cross_entropy) +from .dice_loss import DiceLoss +from .lovasz_loss import LovaszLoss +from .utils import reduce_loss, weight_reduce_loss, weighted_loss + +__all__ = [ + 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', + 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', + 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss' +] diff --git a/mmseg/models/losses/accuracy.py b/mmseg/models/losses/accuracy.py new file mode 100644 index 0000000000000000000000000000000000000000..c0fd2e7e74a0f721c4a814c09d6e453e5956bb38 --- /dev/null +++ b/mmseg/models/losses/accuracy.py @@ -0,0 +1,78 @@ +import torch.nn as nn + + +def accuracy(pred, target, topk=1, thresh=None): + """Calculate accuracy according to the prediction and target. + + Args: + pred (torch.Tensor): The model prediction, shape (N, num_class, ...) + target (torch.Tensor): The target of each prediction, shape (N, , ...) + topk (int | tuple[int], optional): If the predictions in ``topk`` + matches the target, the predictions will be regarded as + correct ones. Defaults to 1. + thresh (float, optional): If not None, predictions with scores under + this threshold are considered incorrect. Default to None. + + Returns: + float | tuple[float]: If the input ``topk`` is a single integer, + the function will return a single float as accuracy. If + ``topk`` is a tuple containing multiple integers, the + function will return a tuple containing accuracies of + each ``topk`` number. + """ + assert isinstance(topk, (int, tuple)) + if isinstance(topk, int): + topk = (topk, ) + return_single = True + else: + return_single = False + + maxk = max(topk) + if pred.size(0) == 0: + accu = [pred.new_tensor(0.) for i in range(len(topk))] + return accu[0] if return_single else accu + assert pred.ndim == target.ndim + 1 + assert pred.size(0) == target.size(0) + assert maxk <= pred.size(1), \ + f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + pred_value, pred_label = pred.topk(maxk, dim=1) + # transpose to shape (maxk, N, ...) + pred_label = pred_label.transpose(0, 1) + correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) + if thresh is not None: + # Only prediction values larger than thresh are counted as correct + correct = correct & (pred_value > thresh).t() + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / target.numel())) + return res[0] if return_single else res + + +class Accuracy(nn.Module): + """Accuracy calculation module.""" + + def __init__(self, topk=(1, ), thresh=None): + """Module to calculate the accuracy. + + Args: + topk (tuple, optional): The criterion used to calculate the + accuracy. Defaults to (1,). + thresh (float, optional): If not None, predictions with scores + under this threshold are considered incorrect. Default to None. + """ + super().__init__() + self.topk = topk + self.thresh = thresh + + def forward(self, pred, target): + """Forward function to calculate accuracy. + + Args: + pred (torch.Tensor): Prediction of models. + target (torch.Tensor): Target for each prediction. + + Returns: + tuple[float]: The accuracies under different topk criterions. + """ + return accuracy(pred, target, self.topk, self.thresh) diff --git a/mmseg/models/losses/cross_entropy_loss.py b/mmseg/models/losses/cross_entropy_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..42c0790c98616bb69621deed55547fc04c7392ef --- /dev/null +++ b/mmseg/models/losses/cross_entropy_loss.py @@ -0,0 +1,198 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def cross_entropy(pred, + label, + weight=None, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=-100): + """The wrapper function for :func:`F.cross_entropy`""" + # class_weight is a manual rescaling weight given to each class. + # If given, has to be a Tensor of size C element-wise losses + loss = F.cross_entropy( + pred, + label, + weight=class_weight, + reduction='none', + ignore_index=ignore_index) + + # apply weights and do the reduction + if weight is not None: + weight = weight.float() + loss = weight_reduce_loss( + loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): + """Expand onehot labels to match the size of prediction.""" + bin_labels = labels.new_zeros(target_shape) + valid_mask = (labels >= 0) & (labels != ignore_index) + inds = torch.nonzero(valid_mask, as_tuple=True) + + if inds[0].numel() > 0: + if labels.dim() == 3: + bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 + else: + bin_labels[inds[0], labels[valid_mask]] = 1 + + valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() + if label_weights is None: + bin_label_weights = valid_mask + else: + bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) + bin_label_weights *= valid_mask + + return bin_labels, bin_label_weights + + +def binary_cross_entropy(pred, + label, + weight=None, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=255): + """Calculate the binary CrossEntropy loss. + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (int | None): The label index to be ignored. Default: 255 + + Returns: + torch.Tensor: The calculated loss + """ + if pred.dim() != label.dim(): + assert (pred.dim() == 2 and label.dim() == 1) or ( + pred.dim() == 4 and label.dim() == 3), \ + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \ + 'H, W], label shape [N, H, W] are supported' + label, weight = _expand_onehot_labels(label, weight, pred.shape, + ignore_index) + + # weighted element-wise losses + if weight is not None: + weight = weight.float() + loss = F.binary_cross_entropy_with_logits( + pred, label.float(), pos_weight=class_weight, reduction='none') + # do the reduction for the weighted loss + loss = weight_reduce_loss( + loss, weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def mask_cross_entropy(pred, + target, + label, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=None): + """Calculate the CrossEntropy loss for masks. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. + label (torch.Tensor): ``label`` indicates the class label of the mask' + corresponding object. This will be used to select the mask in the + of the class which the object belongs to when the mask prediction + if not class-agnostic. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (None): Placeholder, to be consistent with other loss. + Default: None. + + Returns: + torch.Tensor: The calculated loss + """ + assert ignore_index is None, 'BCE loss does not support ignore_index' + # TODO: handle these two reserved arguments + assert reduction == 'mean' and avg_factor is None + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight=class_weight, reduction='mean')[None] + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + """CrossEntropyLoss. + + Args: + use_sigmoid (bool, optional): Whether the prediction uses sigmoid + of softmax. Defaults to False. + use_mask (bool, optional): Whether to use mask cross entropy loss. + Defaults to False. + reduction (str, optional): . Defaults to 'mean'. + Options are "none", "mean" and "sum". + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, + use_sigmoid=False, + use_mask=False, + reduction='mean', + class_weight=None, + loss_weight=1.0): + super(CrossEntropyLoss, self).__init__() + assert (use_sigmoid is False) or (use_mask is False) + self.use_sigmoid = use_sigmoid + self.use_mask = use_mask + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + if self.use_sigmoid: + self.cls_criterion = binary_cross_entropy + elif self.use_mask: + self.cls_criterion = mask_cross_entropy + else: + self.cls_criterion = cross_entropy + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + weight, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls diff --git a/mmseg/models/losses/dice_loss.py b/mmseg/models/losses/dice_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..27a77b962d7d8b3079c7d6cd9db52280c6fb4970 --- /dev/null +++ b/mmseg/models/losses/dice_loss.py @@ -0,0 +1,119 @@ +"""Modified from https://github.com/LikeLy-Journey/SegmenTron/blob/master/ +segmentron/solver/loss.py (Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weighted_loss + + +@weighted_loss +def dice_loss(pred, + target, + valid_mask, + smooth=1, + exponent=2, + class_weight=None, + ignore_index=255): + assert pred.shape[0] == target.shape[0] + total_loss = 0 + num_classes = pred.shape[1] + for i in range(num_classes): + if i != ignore_index: + dice_loss = binary_dice_loss( + pred[:, i], + target[..., i], + valid_mask=valid_mask, + smooth=smooth, + exponent=exponent) + if class_weight is not None: + dice_loss *= class_weight[i] + total_loss += dice_loss + return total_loss / num_classes + + +@weighted_loss +def binary_dice_loss(pred, target, valid_mask, smooth=1, exponent=2, **kwards): + assert pred.shape[0] == target.shape[0] + pred = pred.reshape(pred.shape[0], -1) + target = target.reshape(target.shape[0], -1) + valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) + + num = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) * 2 + smooth + den = torch.sum(pred.pow(exponent) + target.pow(exponent), dim=1) + smooth + + return 1 - num / den + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + """DiceLoss. + + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + smooth (float): A float number to smooth loss, and avoid NaN error. + Default: 1 + exponent (float): An float number to calculate denominator + value: \\sum{x^exponent} + \\sum{y^exponent}. Default: 2. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Default to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + """ + + def __init__(self, + smooth=1, + exponent=2, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ignore_index=255, + **kwards): + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.reduction = reduction + self.class_weight = get_class_weight(class_weight) + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, + pred, + target, + avg_factor=None, + reduction_override=None, + **kwards): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + one_hot_target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), + num_classes=num_classes) + valid_mask = (target != self.ignore_index).long() + + loss = self.loss_weight * dice_loss( + pred, + one_hot_target, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor, + smooth=self.smooth, + exponent=self.exponent, + class_weight=class_weight, + ignore_index=self.ignore_index) + return loss diff --git a/mmseg/models/losses/lovasz_loss.py b/mmseg/models/losses/lovasz_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..e8df6e83079864bb0873fd14be03041007190416 --- /dev/null +++ b/mmseg/models/losses/lovasz_loss.py @@ -0,0 +1,303 @@ +"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor +ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim +Berman 2018 ESAT-PSI KU Leuven (MIT License)""" + +import mmcv +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def lovasz_grad(gt_sorted): + """Computes gradient of the Lovasz extension w.r.t sorted errors. + + See Alg. 1 in paper. + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1. - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def flatten_binary_logits(logits, labels, ignore_index=None): + """Flattens predictions in the batch (binary case) Remove labels equal to + 'ignore_index'.""" + logits = logits.view(-1) + labels = labels.view(-1) + if ignore_index is None: + return logits, labels + valid = (labels != ignore_index) + vlogits = logits[valid] + vlabels = labels[valid] + return vlogits, vlabels + + +def flatten_probs(probs, labels, ignore_index=None): + """Flattens predictions in the batch.""" + if probs.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probs.size() + probs = probs.view(B, 1, H, W) + B, C, H, W = probs.size() + probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C + labels = labels.view(-1) + if ignore_index is None: + return probs, labels + valid = (labels != ignore_index) + vprobs = probs[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobs, vlabels + + +def lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [P], logits at each prediction + (between -infty and +infty). + labels (torch.Tensor): [P], binary ground truth labels (0 or 1). + + Returns: + torch.Tensor: The calculated loss. + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0. + signs = 2. * labels.float() - 1. + errors = (1. - logits * signs) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def lovasz_hinge(logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [B, H, W], logits at each pixel + (between -infty and +infty). + labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). + classes (str | list[int], optional): Placeholder, to be consistent with + other loss. Default: None. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): Placeholder, to be consistent + with other loss. Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + if per_image: + loss = [ + lovasz_hinge_flat(*flatten_binary_logits( + logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + for logit, label in zip(logits, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_hinge_flat( + *flatten_binary_logits(logits, labels, ignore_index)) + return loss + + +def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [P, C], class probabilities at each prediction + (between 0 and 1). + labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + class_weight (list[float], optional): The weight for each class. + Default: None. + + Returns: + torch.Tensor: The calculated loss. + """ + if probs.numel() == 0: + # only void pixels, the gradients should be 0 + return probs * 0. + C = probs.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if (classes == 'present' and fg.sum() == 0): + continue + if C == 1: + if len(classes) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probs[:, 0] + else: + class_pred = probs[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + if class_weight is not None: + loss *= class_weight[c] + losses.append(loss) + return torch.stack(losses).mean() + + +def lovasz_softmax(probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [B, C, H, W], class probabilities at each + prediction (between 0 and 1). + labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and + C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + + if per_image: + loss = [ + lovasz_softmax_flat( + *flatten_probs( + prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + classes=classes, + class_weight=class_weight) + for prob, label in zip(probs, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_softmax_flat( + *flatten_probs(probs, labels, ignore_index), + classes=classes, + class_weight=class_weight) + return loss + + +@LOSSES.register_module() +class LovaszLoss(nn.Module): + """LovaszLoss. + + This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate + for the optimization of the intersection-over-union measure in neural + networks `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0): + super(LovaszLoss, self).__init__() + assert loss_type in ('binary', 'multi_class'), "loss_type should be \ + 'binary' or 'multi_class'." + + if loss_type == 'binary': + self.cls_criterion = lovasz_hinge + else: + self.cls_criterion = lovasz_softmax + assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) + if not per_image: + assert reduction == 'none', "reduction should be 'none' when \ + per_image is False." + + self.classes = classes + self.per_image = per_image + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + + # if multi-class loss, transform logits to probs + if self.cls_criterion == lovasz_softmax: + cls_score = F.softmax(cls_score, dim=1) + + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + self.classes, + self.per_image, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls diff --git a/mmseg/models/losses/utils.py b/mmseg/models/losses/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ab5876603ed823e25e34aec05fc7ec6afd6cfd49 --- /dev/null +++ b/mmseg/models/losses/utils.py @@ -0,0 +1,121 @@ +import functools + +import mmcv +import numpy as np +import torch.nn.functional as F + + +def get_class_weight(class_weight): + """Get class weight for loss function. + + Args: + class_weight (list[float] | str | None): If class_weight is a str, + take it as a file name and read from it. + """ + if isinstance(class_weight, str): + # take it as a file path + if class_weight.endswith('.npy'): + class_weight = np.load(class_weight) + else: + # pkl, json or yaml + class_weight = mmcv.load(class_weight) + + return class_weight + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Avarage factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + assert weight.dim() == loss.dim() + if weight.dim() > 1: + assert weight.size(1) == 1 or weight.size(1) == loss.size(1) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + loss = loss.sum() / avg_factor + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper diff --git a/mmseg/models/necks/__init__.py b/mmseg/models/necks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b9d3d5b3fe80247642d962edd6fb787537d01d6 --- /dev/null +++ b/mmseg/models/necks/__init__.py @@ -0,0 +1,4 @@ +from .fpn import FPN +from .multilevel_neck import MultiLevelNeck + +__all__ = ['FPN', 'MultiLevelNeck'] diff --git a/mmseg/models/necks/fpn.py b/mmseg/models/necks/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..f43d1e62f62b4cde0f181d3d44cef5383fec78b1 --- /dev/null +++ b/mmseg/models/necks/fpn.py @@ -0,0 +1,212 @@ +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, xavier_init + +from ..builder import NECKS + + +@NECKS.register_module() +class FPN(nn.Module): + """Feature Pyramid Network. + + This is an implementation of - Feature Pyramid Networks for Object + Detection (https://arxiv.org/abs/1612.03144) + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale) + num_outs (int): Number of output scales. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + add_extra_convs (bool | str): If bool, it decides whether to add conv + layers on top of the original feature maps. Default to False. + If True, its actual mode is specified by `extra_convs_on_inputs`. + If str, it specifies the source feature map of the extra convs. + Only the following options are allowed + + - 'on_input': Last feat map of neck inputs (i.e. backbone feature). + - 'on_lateral': Last feature map after lateral convs. + - 'on_output': The last output feature map after fpn convs. + extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs + on the original feature from the backbone. If True, + it is equivalent to `add_extra_convs='on_input'`. If False, it is + equivalent to set `add_extra_convs='on_output'`. Default to True. + relu_before_extra_convs (bool): Whether to apply relu before the extra + conv. Default: False. + no_norm_on_lateral (bool): Whether to apply norm on lateral. + Default: False. + conv_cfg (dict): Config dict for convolution layer. Default: None. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (str): Config dict for activation layer in ConvModule. + Default: None. + upsample_cfg (dict): Config dict for interpolate layer. + Default: `dict(mode='nearest')` + + Example: + >>> import torch + >>> in_channels = [2, 3, 5, 7] + >>> scales = [340, 170, 84, 43] + >>> inputs = [torch.rand(1, c, s, s) + ... for c, s in zip(in_channels, scales)] + >>> self = FPN(in_channels, 11, len(in_channels)).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 11, 340, 340]) + outputs[1].shape = torch.Size([1, 11, 170, 170]) + outputs[2].shape = torch.Size([1, 11, 84, 84]) + outputs[3].shape = torch.Size([1, 11, 43, 43]) + """ + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest')): + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + if extra_convs_on_inputs: + # For compatibility with previous release + # TODO: deprecate `extra_convs_on_inputs` + self.add_extra_convs = 'on_input' + else: + self.add_extra_convs = 'on_output' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + act_cfg=act_cfg, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + laterals[i - 1] += F.interpolate(laterals[i], + **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += F.interpolate( + laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/mmseg/models/necks/multilevel_neck.py b/mmseg/models/necks/multilevel_neck.py new file mode 100644 index 0000000000000000000000000000000000000000..7e13813b16f9bd79518451036e2dc864e84b7240 --- /dev/null +++ b/mmseg/models/necks/multilevel_neck.py @@ -0,0 +1,70 @@ +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from ..builder import NECKS + + +@NECKS.register_module() +class MultiLevelNeck(nn.Module): + """MultiLevelNeck. + + A neck structure connect vit backbone and decoder_heads. + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + scales (List[int]): Scale factors for each input feature map. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scales=[0.5, 1, 2, 4], + norm_cfg=None, + act_cfg=None): + super(MultiLevelNeck, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.scales = scales + self.num_outs = len(scales) + self.lateral_convs = nn.ModuleList() + self.convs = nn.ModuleList() + for in_channel in in_channels: + self.lateral_convs.append( + ConvModule( + in_channel, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + for _ in range(self.num_outs): + self.convs.append( + ConvModule( + out_channels, + out_channels, + kernel_size=3, + padding=1, + stride=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + print(inputs[0].shape) + inputs = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + # for len(inputs) not equal to self.num_outs + if len(inputs) == 1: + inputs = [inputs[0] for _ in range(self.num_outs)] + outs = [] + for i in range(self.num_outs): + x_resize = F.interpolate( + inputs[i], scale_factor=self.scales[i], mode='bilinear') + outs.append(self.convs[i](x_resize)) + return tuple(outs) diff --git a/mmseg/models/segmentors/__init__.py b/mmseg/models/segmentors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dca2f09405330743c476e190896bee39c45498ea --- /dev/null +++ b/mmseg/models/segmentors/__init__.py @@ -0,0 +1,5 @@ +from .base import BaseSegmentor +from .cascade_encoder_decoder import CascadeEncoderDecoder +from .encoder_decoder import EncoderDecoder + +__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py new file mode 100644 index 0000000000000000000000000000000000000000..7b53757537a129fb92795caa7f661738c7252fb4 --- /dev/null +++ b/mmseg/models/segmentors/base.py @@ -0,0 +1,273 @@ +import logging +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import mmcv +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +from mmcv.runner import auto_fp16 + + +class BaseSegmentor(nn.Module): + """Base class for segmentors.""" + + __metaclass__ = ABCMeta + + def __init__(self): + super(BaseSegmentor, self).__init__() + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the segmentor has neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_auxiliary_head(self): + """bool: whether the segmentor has auxiliary head""" + return hasattr(self, + 'auxiliary_head') and self.auxiliary_head is not None + + @property + def with_decode_head(self): + """bool: whether the segmentor has decode head""" + return hasattr(self, 'decode_head') and self.decode_head is not None + + @abstractmethod + def extract_feat(self, imgs): + """Placeholder for extract features from images.""" + pass + + @abstractmethod + def encode_decode(self, img, img_metas): + """Placeholder for encode images with backbone and decode into a + semantic segmentation map of the same size as input.""" + pass + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + """Placeholder for Forward function for training.""" + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + """Placeholder for single image test.""" + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + """Placeholder for augmentation test.""" + pass + + def init_weights(self, pretrained=None): + """Initialize the weights in segmentor. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if pretrained is not None: + logger = logging.getLogger() + logger.info(f'load model from: {pretrained}') + + def forward_test(self, imgs, img_metas, **kwargs): + """ + Args: + imgs (List[Tensor]): the outer list indicates test-time + augmentations and inner Tensor should have a shape NxCxHxW, + which contains all images in the batch. + img_metas (List[List[dict]]): the outer list indicates test-time + augs (multiscale, flip, etc.) and the inner list indicates + images in a batch. + """ + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError(f'{name} must be a list, but got ' + f'{type(var)}') + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError(f'num of augmentations ({len(imgs)}) != ' + f'num of image meta ({len(img_metas)})') + # all images in the same aug batch all of the same ori_shape and pad + # shape + for img_meta in img_metas: + ori_shapes = [_['ori_shape'] for _ in img_meta] + assert all(shape == ori_shapes[0] for shape in ori_shapes) + img_shapes = [_['img_shape'] for _ in img_meta] + assert all(shape == img_shapes[0] for shape in img_shapes) + pad_shapes = [_['pad_shape'] for _ in img_meta] + assert all(shape == pad_shapes[0] for shape in pad_shapes) + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + @auto_fp16(apply_to=('img', )) + def forward(self, img, img_metas, return_loss=True, **kwargs): + """Calls either :func:`forward_train` or :func:`forward_test` depending + on whether ``return_loss`` is ``True``. + + Note this setting will change the expected inputs. When + ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor + and List[dict]), and when ``resturn_loss=False``, img and img_meta + should be double nested (i.e. List[Tensor], List[List[dict]]), with + the outer list indicating test time augmentations. + """ + if return_loss: + return self.forward_train(img, img_metas, **kwargs) + else: + return self.forward_test(img, img_metas, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + losses = self(**data_batch) + loss, log_vars = self._parse_losses(losses) + + outputs = dict( + loss=loss, + log_vars=log_vars, + num_samples=len(data_batch['img_metas'])) + + return outputs + + def val_step(self, data_batch, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + output = self(**data_batch, **kwargs) + return output + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() + if 'loss' in _key) + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def show_result(self, + img, + result, + palette=None, + win_name='', + show=False, + wait_time=0, + out_file=None, + opacity=0.5): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + result (Tensor): The semantic segmentation results to draw over + `img`. + palette (list[list[int]]] | np.ndarray | None): The palette of + segmentation map. If None is given, random palette will be + generated. Default: None + win_name (str): The window name. + wait_time (int): Value of waitKey param. + Default: 0. + show (bool): Whether to show the image. + Default: False. + out_file (str or None): The filename to write the image. + Default: None. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + img = mmcv.imread(img) + img = img.copy() + seg = result[0] + if palette is None: + if self.PALETTE is None: + palette = np.random.randint( + 0, 255, size=(len(self.CLASSES), 3)) + else: + palette = self.PALETTE + palette = np.array(palette) + assert palette.shape[0] == len(self.CLASSES) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + # if out_file specified, do not show image in window + if out_file is not None: + show = False + + if show: + mmcv.imshow(img, win_name, wait_time) + if out_file is not None: + mmcv.imwrite(img, out_file) + + if not (show or out_file): + warnings.warn('show==False and out_file is not specified, only ' + 'result image will be returned') + return img diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..220ab2bb365b61885482bdd86606e632f2af74ae --- /dev/null +++ b/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -0,0 +1,98 @@ +from torch import nn + +from mmseg.core import add_prefix +from mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .encoder_decoder import EncoderDecoder + + +@SEGMENTORS.register_module() +class CascadeEncoderDecoder(EncoderDecoder): + """Cascade Encoder Decoder segmentors. + + CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of + CascadeEncoderDecoder are cascaded. The output of previous decoder_head + will be the input of next decoder_head. + """ + + def __init__(self, + num_stages, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + self.num_stages = num_stages + super(CascadeEncoderDecoder, self).__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + assert isinstance(decode_head, list) + assert len(decode_head) == self.num_stages + self.decode_head = nn.ModuleList() + for i in range(self.num_stages): + self.decode_head.append(builder.build_head(decode_head[i])) + self.align_corners = self.decode_head[-1].align_corners + self.num_classes = self.decode_head[-1].num_classes + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + self.backbone.init_weights(pretrained=pretrained) + for i in range(self.num_stages): + self.decode_head[i].init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) + for i in range(1, self.num_stages): + out = self.decode_head[i].forward_test(x, out, img_metas, + self.test_cfg) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + + loss_decode = self.decode_head[0].forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode_0')) + + for i in range(1, self.num_stages): + # forward test again, maybe unnecessary for most methods. + prev_outputs = self.decode_head[i - 1].forward_test( + x, img_metas, self.test_cfg) + loss_decode = self.decode_head[i].forward_train( + x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_decode, f'decode_{i}')) + + return losses diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d067dcbed0822562c9cd2e5e54ba42f0597938 --- /dev/null +++ b/mmseg/models/segmentors/encoder_decoder.py @@ -0,0 +1,298 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmseg.core import add_prefix +from mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .base import BaseSegmentor + + +@SEGMENTORS.register_module() +class EncoderDecoder(BaseSegmentor): + """Encoder Decoder segmentors. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + """ + + def __init__(self, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(EncoderDecoder, self).__init__() + self.backbone = builder.build_backbone(backbone) + if neck is not None: + self.neck = builder.build_neck(neck) + self._init_decode_head(decode_head) + self._init_auxiliary_head(auxiliary_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + self.init_weights(pretrained=pretrained) + + assert self.with_decode_head + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + self.decode_head = builder.build_head(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + + def _init_auxiliary_head(self, auxiliary_head): + """Initialize ``auxiliary_head``""" + if auxiliary_head is not None: + if isinstance(auxiliary_head, list): + self.auxiliary_head = nn.ModuleList() + for head_cfg in auxiliary_head: + self.auxiliary_head.append(builder.build_head(head_cfg)) + else: + self.auxiliary_head = builder.build_head(auxiliary_head) + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + + super(EncoderDecoder, self).init_weights(pretrained) + self.backbone.init_weights(pretrained=pretrained) + self.decode_head.init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def extract_feat(self, img): + """Extract features from images.""" + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self._decode_head_forward_test(x, img_metas) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _decode_head_forward_test(self, x, img_metas): + """Run forward function and calculate loss for decode head in + inference.""" + seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) + return seg_logits + + def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def forward_dummy(self, img): + """Dummy forward function.""" + seg_logit = self.encode_decode(img, None) + + return seg_logit + + def forward_train(self, img, img_metas, gt_semantic_seg): + """Forward function for training. + + Args: + img (Tensor): Input images. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + x = self.extract_feat(img) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, img_metas, + gt_semantic_seg) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train( + x, img_metas, gt_semantic_seg) + losses.update(loss_aux) + + return losses + + # TODO refactor + def slide_inference(self, img, img_meta, rescale): + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = img.size() + num_classes = self.num_classes + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) + count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = img[:, :, y1:y2, x1:x2] + crop_seg_logit = self.encode_decode(crop_img, img_meta) + preds += F.pad(crop_seg_logit, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + if torch.onnx.is_in_onnx_export(): + # cast count_mat to constant while exporting to ONNX + count_mat = torch.from_numpy( + count_mat.cpu().detach().numpy()).to(device=img.device) + preds = preds / count_mat + if rescale: + preds = resize( + preds, + size=img_meta[0]['ori_shape'][:2], + mode='bilinear', + align_corners=self.align_corners, + warning=False) + return preds + + def whole_inference(self, img, img_meta, rescale): + """Inference with full image.""" + + seg_logit = self.encode_decode(img, img_meta) + if rescale: + # support dynamic shape for onnx + if torch.onnx.is_in_onnx_export(): + size = img.shape[2:] + else: + size = img_meta[0]['ori_shape'][:2] + seg_logit = resize( + seg_logit, + size=size, + mode='bilinear', + align_corners=self.align_corners, + warning=False) + + return seg_logit + + def inference(self, img, img_meta, rescale): + """Inference with slide/whole style. + + Args: + img (Tensor): The input image of shape (N, 3, H, W). + img_meta (dict): Image info dict where each dict has: 'img_shape', + 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + rescale (bool): Whether rescale back to original shape. + + Returns: + Tensor: The output segmentation map. + """ + + assert self.test_cfg.mode in ['slide', 'whole'] + ori_shape = img_meta[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in img_meta) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(img, img_meta, rescale) + else: + seg_logit = self.whole_inference(img, img_meta, rescale) + output = F.softmax(seg_logit, dim=1) + flip = img_meta[0]['flip'] + if flip: + flip_direction = img_meta[0]['flip_direction'] + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + output = output.flip(dims=(3, )) + elif flip_direction == 'vertical': + output = output.flip(dims=(2, )) + + return output + + def simple_test(self, img, img_meta, rescale=True): + """Simple test with single image.""" + seg_logit = self.inference(img, img_meta, rescale) + seg_pred = seg_logit.argmax(dim=1) + if torch.onnx.is_in_onnx_export(): + # our inference backend only support 4D output + seg_pred = seg_pred.unsqueeze(0) + return seg_pred + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(imgs[0], img_metas[0], rescale) + for i in range(1, len(imgs)): + cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) + seg_logit += cur_seg_logit + seg_logit /= len(imgs) + seg_pred = seg_logit.argmax(dim=1) + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/mmseg/models/utils/__init__.py b/mmseg/models/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d3bdd349b9f2ae499a2fcb2ac1d2e3c77befebe --- /dev/null +++ b/mmseg/models/utils/__init__.py @@ -0,0 +1,13 @@ +from .drop import DropPath +from .inverted_residual import InvertedResidual, InvertedResidualV3 +from .make_divisible import make_divisible +from .res_layer import ResLayer +from .se_layer import SELayer +from .self_attention_block import SelfAttentionBlock +from .up_conv_block import UpConvBlock +from .weight_init import trunc_normal_ + +__all__ = [ + 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', + 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'DropPath', 'trunc_normal_' +] diff --git a/mmseg/models/utils/drop.py b/mmseg/models/utils/drop.py new file mode 100644 index 0000000000000000000000000000000000000000..4520b0ff407d2a95a864086bdbca0065f222aa63 --- /dev/null +++ b/mmseg/models/utils/drop.py @@ -0,0 +1,31 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import torch +from torch import nn + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + Args: + drop_prob (float): Drop rate for paths of model. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, drop_prob=0.): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.keep_prob = 1 - drop_prob + + def forward(self, x): + if self.drop_prob == 0. or not self.training: + return x + shape = (x.shape[0], ) + (1, ) * ( + x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = self.keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(self.keep_prob) * random_tensor + return output diff --git a/mmseg/models/utils/inverted_residual.py b/mmseg/models/utils/inverted_residual.py new file mode 100644 index 0000000000000000000000000000000000000000..ede71a291453d96f91b54ce12c609606f5b80c9e --- /dev/null +++ b/mmseg/models/utils/inverted_residual.py @@ -0,0 +1,208 @@ +from mmcv.cnn import ConvModule +from torch import nn +from torch.utils import checkpoint as cp + +from .se_layer import SELayer + + +class InvertedResidual(nn.Module): + """InvertedResidual block for MobileNetV2. + + Args: + in_channels (int): The input channels of the InvertedResidual block. + out_channels (int): The output channels of the InvertedResidual block. + stride (int): Stride of the middle (first) 3x3 convolution. + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + dilation (int): Dilation rate of depthwise conv. Default: 1 + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2], f'stride must in [1, 2]. ' \ + f'But received {stride}.' + self.with_cp = with_cp + self.use_res_connect = self.stride == 1 and in_channels == out_channels + hidden_dim = int(round(in_channels * expand_ratio)) + + layers = [] + if expand_ratio != 1: + layers.append( + ConvModule( + in_channels=in_channels, + out_channels=hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + layers.extend([ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + + def _inner_forward(x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InvertedResidualV3(nn.Module): + """Inverted Residual Block for MobileNetV3. + + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + mid_channels (int): The input channels of the depthwise convolution. + kernel_size (int): The kernel size of the depthwise convolution. + Default: 3. + stride (int): The stride of the depthwise convolution. Default: 1. + se_cfg (dict): Config dict for se layer. Default: None, which means no + se layer. + with_expand_conv (bool): Use expand conv or not. If set False, + mid_channels must be the same with in_channels. Default: True. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False): + super(InvertedResidualV3, self).__init__() + self.with_res_shortcut = (stride == 1 and in_channels == out_channels) + assert stride in [1, 2] + self.with_cp = with_cp + self.with_se = se_cfg is not None + self.with_expand_conv = with_expand_conv + + if self.with_se: + assert isinstance(se_cfg, dict) + if not self.with_expand_conv: + assert mid_channels == in_channels + + if self.with_expand_conv: + self.expand_conv = ConvModule( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.depthwise_conv = ConvModule( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=mid_channels, + conv_cfg=dict( + type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + if self.with_se: + self.se = SELayer(**se_cfg) + + self.linear_conv = ConvModule( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, x): + + def _inner_forward(x): + out = x + + if self.with_expand_conv: + out = self.expand_conv(out) + + out = self.depthwise_conv(out) + + if self.with_se: + out = self.se(out) + + out = self.linear_conv(out) + + if self.with_res_shortcut: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out diff --git a/mmseg/models/utils/make_divisible.py b/mmseg/models/utils/make_divisible.py new file mode 100644 index 0000000000000000000000000000000000000000..75ad756052529f52fe83bb95dd1f0ecfc9a13078 --- /dev/null +++ b/mmseg/models/utils/make_divisible.py @@ -0,0 +1,27 @@ +def make_divisible(value, divisor, min_value=None, min_ratio=0.9): + """Make divisible function. + + This function rounds the channel number to the nearest value that can be + divisible by the divisor. It is taken from the original tf repo. It ensures + that all layers have a channel number that is divisible by divisor. It can + be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa + + Args: + value (int): The original channel number. + divisor (int): The divisor to fully divide the channel number. + min_value (int): The minimum value of the output channel. + Default: None, means that the minimum value equal to the divisor. + min_ratio (float): The minimum ratio of the rounded channel number to + the original channel number. Default: 0.9. + + Returns: + int: The modified output channel number. + """ + + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than (1-min_ratio). + if new_value < min_ratio * value: + new_value += divisor + return new_value diff --git a/mmseg/models/utils/res_layer.py b/mmseg/models/utils/res_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..2585ab551aea79252ef6b34b5faef476e9e1abaa --- /dev/null +++ b/mmseg/models/utils/res_layer.py @@ -0,0 +1,94 @@ +from mmcv.cnn import build_conv_layer, build_norm_layer +from torch import nn as nn + + +class ResLayer(nn.Sequential): + """ResLayer to build ResNet style backbone. + + Args: + block (nn.Module): block used to build ResLayer. + inplanes (int): inplanes of block. + planes (int): planes of block. + num_blocks (int): number of blocks. + stride (int): stride of the first block. Default: 1 + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + multi_grid (int | None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + """ + + def __init__(self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs): + self.block = block + + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = [] + conv_stride = stride + if avg_down: + conv_stride = 1 + downsample.append( + nn.AvgPool2d( + kernel_size=stride, + stride=stride, + ceil_mode=True, + count_include_pad=False)) + downsample.extend([ + build_conv_layer( + conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=conv_stride, + bias=False), + build_norm_layer(norm_cfg, planes * block.expansion)[1] + ]) + downsample = nn.Sequential(*downsample) + + layers = [] + if multi_grid is None: + if dilation > 1 and contract_dilation: + first_dilation = dilation // 2 + else: + first_dilation = dilation + else: + first_dilation = multi_grid[0] + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=stride, + dilation=first_dilation, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=1, + dilation=dilation if multi_grid is None else multi_grid[i], + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + super(ResLayer, self).__init__(*layers) diff --git a/mmseg/models/utils/se_layer.py b/mmseg/models/utils/se_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..e08340457b22c13bd69831ef56921e268f49e813 --- /dev/null +++ b/mmseg/models/utils/se_layer.py @@ -0,0 +1,57 @@ +import mmcv +import torch.nn as nn +from mmcv.cnn import ConvModule + +from .make_divisible import make_divisible + + +class SELayer(nn.Module): + """Squeeze-and-Excitation Module. + + Args: + channels (int): The input (and output) channels of the SE layer. + ratio (int): Squeeze ratio in SELayer, the intermediate channel will be + ``int(channels/ratio)``. Default: 16. + conv_cfg (None or dict): Config dict for convolution layer. + Default: None, which means using conv2d. + act_cfg (dict or Sequence[dict]): Config dict for activation layer. + If act_cfg is a dict, two activation layers will be configured + by this dict. If act_cfg is a sequence of dicts, the first + activation layer will be configured by the first dict and the + second activation layer will be configured by the second dict. + Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, + divisor=6.0)). + """ + + def __init__(self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))): + super(SELayer, self).__init__() + if isinstance(act_cfg, dict): + act_cfg = (act_cfg, act_cfg) + assert len(act_cfg) == 2 + assert mmcv.is_tuple_of(act_cfg, dict) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.conv1 = ConvModule( + in_channels=channels, + out_channels=make_divisible(channels // ratio, 8), + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[0]) + self.conv2 = ConvModule( + in_channels=make_divisible(channels // ratio, 8), + out_channels=channels, + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[1]) + + def forward(self, x): + out = self.global_avgpool(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out diff --git a/mmseg/models/utils/self_attention_block.py b/mmseg/models/utils/self_attention_block.py new file mode 100644 index 0000000000000000000000000000000000000000..372fad2e000a157c7ef283a82388a7fea0158ee0 --- /dev/null +++ b/mmseg/models/utils/self_attention_block.py @@ -0,0 +1,159 @@ +import torch +from mmcv.cnn import ConvModule, constant_init +from torch import nn as nn +from torch.nn import functional as F + + +class SelfAttentionBlock(nn.Module): + """General self-attention block/non-local block. + + Please refer to https://arxiv.org/abs/1706.03762 for details about key, + query and value. + + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_num_convs (int): Number of convs for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out, + conv_cfg, norm_cfg, act_cfg): + super(SelfAttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.value_project = self.build_project( + key_in_channels, + channels if with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + self.init_weights() + + def init_weights(self): + """Initialize weight of later layer.""" + if self.out_project is not None: + if not isinstance(self.out_project, ConvModule): + constant_init(self.out_project, 0) + + def build_project(self, in_channels, channels, num_convs, use_conv_module, + conv_cfg, norm_cfg, act_cfg): + """Build projection layer for key/query/value/out.""" + if use_conv_module: + convs = [ + ConvModule( + in_channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ] + for _ in range(num_convs - 1): + convs.append( + ConvModule( + channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + convs = [nn.Conv2d(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2d(channels, channels, 1)) + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats, key_feats): + """Forward function.""" + batch_size = query_feats.size(0) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.reshape(*query.shape[:2], -1) + query = query.permute(0, 2, 1).contiguous() + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + key = key.reshape(*key.shape[:2], -1) + value = value.reshape(*value.shape[:2], -1) + value = value.permute(0, 2, 1).contiguous() + + sim_map = torch.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.matmul(sim_map, value) + context = context.permute(0, 2, 1).contiguous() + context = context.reshape(batch_size, -1, *query_feats.shape[2:]) + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/mmseg/models/utils/up_conv_block.py b/mmseg/models/utils/up_conv_block.py new file mode 100644 index 0000000000000000000000000000000000000000..6566b749dbcb014312c586d1e6a618ec49249456 --- /dev/null +++ b/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, build_upsample_layer + + +class UpConvBlock(nn.Module): + """Upsample convolution block in decoder for UNet. + + This upsample convolution block consists of one upsample module + followed by one convolution block. The upsample module expands the + high-level low-resolution feature map and the convolution block fuses + the upsampled high-level low-resolution feature map and the low-level + high-resolution feature map from encoder. + + Args: + conv_block (nn.Sequential): Sequential of convolutional layers. + in_channels (int): Number of input channels of the high-level + skip_channels (int): Number of input channels of the low-level + high-resolution feature map from encoder. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers in the conv_block. + Default: 2. + stride (int): Stride of convolutional layer in conv_block. Default: 1. + dilation (int): Dilation rate of convolutional layer in conv_block. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). If the size of + high-level feature map is the same as that of skip feature map + (low-level feature map from encoder), it does not need upsample the + high-level feature map and the upsample_cfg is None. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None): + super(UpConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.conv_block = conv_block( + in_channels=2 * skip_channels, + out_channels=out_channels, + num_convs=num_convs, + stride=stride, + dilation=dilation, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None) + if upsample_cfg is not None: + self.upsample = build_upsample_layer( + cfg=upsample_cfg, + in_channels=in_channels, + out_channels=skip_channels, + with_cp=with_cp, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.upsample = ConvModule( + in_channels, + skip_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, skip, x): + """Forward function.""" + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + + return out diff --git a/mmseg/models/utils/weight_init.py b/mmseg/models/utils/weight_init.py new file mode 100644 index 0000000000000000000000000000000000000000..38141ba3d61f64ddfc0a31574b4648cbad96d7dd --- /dev/null +++ b/mmseg/models/utils/weight_init.py @@ -0,0 +1,62 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import math +import warnings + +import torch + + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + """Reference: https://people.sc.fsu.edu/~jburkardt/presentations + /truncated_normal.pdf""" + + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower_bound = norm_cdf((a - mean) / std) + upper_bound = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * lower_bound - 1, 2 * upper_bound - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor` + mean (float): the mean of the normal distribution + std (float): the standard deviation of the normal distribution + a (float): the minimum cutoff value + b (float): the maximum cutoff value + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/mmseg/ops/__init__.py b/mmseg/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bec51c75b9363a9a19e9fb5c35f4e7dbd6f7751c --- /dev/null +++ b/mmseg/ops/__init__.py @@ -0,0 +1,4 @@ +from .encoding import Encoding +from .wrappers import Upsample, resize + +__all__ = ['Upsample', 'resize', 'Encoding'] diff --git a/mmseg/ops/encoding.py b/mmseg/ops/encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..7eb3629a6426550b8e4c537ee1ff4341893e489e --- /dev/null +++ b/mmseg/ops/encoding.py @@ -0,0 +1,74 @@ +import torch +from torch import nn +from torch.nn import functional as F + + +class Encoding(nn.Module): + """Encoding Layer: a learnable residual encoder. + + Input is of shape (batch_size, channels, height, width). + Output is of shape (batch_size, num_codes, channels). + + Args: + channels: dimension of the features or feature channels + num_codes: number of code words + """ + + def __init__(self, channels, num_codes): + super(Encoding, self).__init__() + # init codewords and smoothing factor + self.channels, self.num_codes = channels, num_codes + std = 1. / ((num_codes * channels)**0.5) + # [num_codes, channels] + self.codewords = nn.Parameter( + torch.empty(num_codes, channels, + dtype=torch.float).uniform_(-std, std), + requires_grad=True) + # [num_codes] + self.scale = nn.Parameter( + torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), + requires_grad=True) + + @staticmethod + def scaled_l2(x, codewords, scale): + num_codes, channels = codewords.size() + batch_size = x.size(0) + reshaped_scale = scale.view((1, 1, num_codes)) + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + + scaled_l2_norm = reshaped_scale * ( + expanded_x - reshaped_codewords).pow(2).sum(dim=3) + return scaled_l2_norm + + @staticmethod + def aggregate(assignment_weights, x, codewords): + num_codes, channels = codewords.size() + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + batch_size = x.size(0) + + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * + (expanded_x - reshaped_codewords)).sum(dim=1) + return encoded_feat + + def forward(self, x): + assert x.dim() == 4 and x.size(1) == self.channels + # [batch_size, channels, height, width] + batch_size = x.size(0) + # [batch_size, height x width, channels] + x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() + # assignment_weights: [batch_size, channels, num_codes] + assignment_weights = F.softmax( + self.scaled_l2(x, self.codewords, self.scale), dim=2) + # aggregate + encoded_feat = self.aggregate(assignment_weights, x, self.codewords) + return encoded_feat + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ + f'x{self.channels})' + return repr_str diff --git a/mmseg/ops/wrappers.py b/mmseg/ops/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed9a0cb8d7c0e0ec2748dd89c652756653cac78 --- /dev/null +++ b/mmseg/ops/wrappers.py @@ -0,0 +1,50 @@ +import warnings + +import torch.nn as nn +import torch.nn.functional as F + + +def resize(input, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > output_h: + if ((output_h > 1 and output_w > 1 and input_h > 1 + and input_w > 1) and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1)): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`') + return F.interpolate(input, size, scale_factor, mode, align_corners) + + +class Upsample(nn.Module): + + def __init__(self, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None): + super(Upsample, self).__init__() + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + if not self.size: + size = [int(t * self.scale_factor) for t in x.shape[-2:]] + else: + size = self.size + return resize(x, size, None, self.mode, self.align_corners) diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac489e2dbbc0e6fa87f5088b4edcc20f8cadc1a6 --- /dev/null +++ b/mmseg/utils/__init__.py @@ -0,0 +1,4 @@ +from .collect_env import collect_env +from .logger import get_root_logger + +__all__ = ['get_root_logger', 'collect_env'] diff --git a/mmseg/utils/collect_env.py b/mmseg/utils/collect_env.py new file mode 100644 index 0000000000000000000000000000000000000000..8293a05fb3422249485c4d877749fc9c92a51e36 --- /dev/null +++ b/mmseg/utils/collect_env.py @@ -0,0 +1,17 @@ +from mmcv.utils import collect_env as collect_base_env +from mmcv.utils import get_git_hash + +import mmseg + + +def collect_env(): + """Collect the information of the running environments.""" + env_info = collect_base_env() + env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' + + return env_info + + +if __name__ == '__main__': + for name, val in collect_env().items(): + print('{}: {}'.format(name, val)) diff --git a/mmseg/utils/logger.py b/mmseg/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..05d2f13439ff501aa51b248ce7396ea5d41a38fb --- /dev/null +++ b/mmseg/utils/logger.py @@ -0,0 +1,27 @@ +import logging + +from mmcv.utils import get_logger + + +def get_root_logger(log_file=None, log_level=logging.INFO): + """Get the root logger. + + The logger will be initialized if it has not been initialized. By default a + StreamHandler will be added. If `log_file` is specified, a FileHandler will + also be added. The name of the root logger is the top-level package name, + e.g., "mmseg". + + Args: + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the root logger. + log_level (int): The root logger level. Note that only the process of + rank 0 is affected, while other processes will set the level to + "Error" and be silent most of the time. + + Returns: + logging.Logger: The root logger. + """ + + logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) + + return logger diff --git a/mmseg/version.py b/mmseg/version.py new file mode 100644 index 0000000000000000000000000000000000000000..e090d9f31aae3ce0a8fd6392d519163130f437dc --- /dev/null +++ b/mmseg/version.py @@ -0,0 +1,18 @@ +# Copyright (c) Open-MMLab. All rights reserved. + +__version__ = '0.13.0' + + +def parse_version_info(version_str): + version_info = [] + for x in version_str.split('.'): + if x.isdigit(): + version_info.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + version_info.append(int(patch_version[0])) + version_info.append(f'rc{patch_version[1]}') + return tuple(version_info) + + +version_info = parse_version_info(__version__) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d92c6b20384ea33c465e96d4c9b38e1e25540952 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +--find-links https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html +mmcv-full==1.3.17 +torch==1.10.0 +torchvision +einops +timm +Pillow +cityscapesscripts +matplotlib +numpy +prettytable +codecov +flake8 +interrogate +isort==4.3.21 +pytest +xdoctest>=0.10.0 +yapf + diff --git a/requirements/docs.txt b/requirements/docs.txt new file mode 100644 index 0000000000000000000000000000000000000000..89fbf86c01cb29f10f7e99c910248c4d5229da58 --- /dev/null +++ b/requirements/docs.txt @@ -0,0 +1,4 @@ +recommonmark +sphinx +sphinx_markdown_tables +sphinx_rtd_theme diff --git a/requirements/optional.txt b/requirements/optional.txt new file mode 100644 index 0000000000000000000000000000000000000000..47fa5933159eb068640f6b45281d36ab4af294cb --- /dev/null +++ b/requirements/optional.txt @@ -0,0 +1 @@ +cityscapesscripts diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt new file mode 100644 index 0000000000000000000000000000000000000000..0542bfce6dff3b002a1d33e53c0be975e7feed4a --- /dev/null +++ b/requirements/readthedocs.txt @@ -0,0 +1,3 @@ +mmcv +torch +torchvision diff --git a/requirements/runtime.txt b/requirements/runtime.txt new file mode 100644 index 0000000000000000000000000000000000000000..47048d029ad52d28d41ac24162ec918b1bbdef93 --- /dev/null +++ b/requirements/runtime.txt @@ -0,0 +1,3 @@ +matplotlib +numpy +prettytable diff --git a/requirements/tests.txt b/requirements/tests.txt new file mode 100644 index 0000000000000000000000000000000000000000..991fd711d4eed749f8f3440f9c4b448e3973575f --- /dev/null +++ b/requirements/tests.txt @@ -0,0 +1,7 @@ +codecov +flake8 +interrogate +isort==4.3.21 +pytest +xdoctest>=0.10.0 +yapf diff --git a/tools/align_resize.py b/tools/align_resize.py new file mode 100644 index 0000000000000000000000000000000000000000..3819df8d6b78d88eb53aa1323387a7425dbd8a86 --- /dev/null +++ b/tools/align_resize.py @@ -0,0 +1,218 @@ +import mmcv +import numpy as np +from mmcv.utils import deprecated_api_warning, is_tuple_of +from numpy import random + +from mmseg.datasets.builder import PIPELINES +from IPython import embed + +@PIPELINES.register_module() +class AlignResize(object): + """Resize images & seg. Align + """ + + def __init__(self, + img_scale=None, + multiscale_mode='range', + ratio_range=None, + keep_ratio=True, + size_divisor=32): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given img_scale=None and a range of image ratio + # mode 2: given a scale and a range of image ratio + assert self.img_scale is None or len(self.img_scale) == 1 + else: + # mode 3 and 4: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + self.size_divisor = size_divisor + + @staticmethod + def random_select(img_scales): + """Randomly select an img_scale from given candidates. + Args: + img_scales (list[tuple]): Images scales for selection. + Returns: + (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, + where ``img_scale`` is the selected image scale and + ``scale_idx`` is the selected index in the given candidates. + """ + + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + """Randomly sample an img_scale when ``multiscale_mode=='range'``. + Args: + img_scales (list[tuple]): Images scale range for sampling. + There must be two tuples in img_scales, which specify the lower + and uper bound of image scales. + Returns: + (tuple, None): Returns a tuple ``(img_scale, None)``, where + ``img_scale`` is sampled scale and None is just a placeholder + to be consistent with :func:`random_select`. + """ + + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + """Randomly sample an img_scale when ``ratio_range`` is specified. + A ratio will be randomly sampled from the range specified by + ``ratio_range``. Then it would be multiplied with ``img_scale`` to + generate sampled scale. + Args: + img_scale (tuple): Images scale base to multiply with ratio. + ratio_range (tuple[float]): The minimum and maximum ratio to scale + the ``img_scale``. + Returns: + (tuple, None): Returns a tuple ``(scale, None)``, where + ``scale`` is sampled ratio multiplied with ``img_scale`` and + None is just a placeholder to be consistent with + :func:`random_select`. + """ + + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + """Randomly sample an img_scale according to ``ratio_range`` and + ``multiscale_mode``. + If ``ratio_range`` is specified, a ratio will be sampled and be + multiplied with ``img_scale``. + If multiple scales are specified by ``img_scale``, a scale will be + sampled according to ``multiscale_mode``. + Otherwise, single scale will be used. + Args: + results (dict): Result dict from :obj:`dataset`. + Returns: + dict: Two new keys 'scale` and 'scale_idx` are added into + ``results``, which would be used by subsequent pipelines. + """ + + if self.ratio_range is not None: + if self.img_scale is None: + h, w = results['img'].shape[:2] + scale, scale_idx = self.random_sample_ratio((w, h), + self.ratio_range) + else: + scale, scale_idx = self.random_sample_ratio( + self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _align(self, img, size_divisor, interpolation=None): + align_h = int(np.ceil(img.shape[0] / size_divisor)) * size_divisor + align_w = int(np.ceil(img.shape[1] / size_divisor)) * size_divisor + if interpolation == None: + img = mmcv.imresize(img, (align_w, align_h)) + else: + img = mmcv.imresize(img, (align_w, align_h), interpolation=interpolation) + return img + + def _resize_img(self, results): + """Resize images with ``results['scale']``.""" + if self.keep_ratio: + img, scale_factor = mmcv.imrescale( + results['img'], results['scale'], return_scale=True) + #### align #### + img = self._align(img, self.size_divisor) + # the w_scale and h_scale has minor difference + # a real fix should be done in the mmcv.imrescale in the future + new_h, new_w = img.shape[:2] + h, w = results['img'].shape[:2] + w_scale = new_w / w + h_scale = new_h / h + else: + img, w_scale, h_scale = mmcv.imresize( + results['img'], results['scale'], return_scale=True) + + h, w = img.shape[:2] + assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ + int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ + "img size not align. h:{} w:{}".format(h,w) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], + dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_seg(self, results): + """Resize semantic segmentation map with ``results['scale']``.""" + for key in results.get('seg_fields', []): + if self.keep_ratio: + gt_seg = mmcv.imrescale( + results[key], results['scale'], interpolation='nearest') + gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest') + else: + gt_seg = mmcv.imresize( + results[key], results['scale'], interpolation='nearest') + h, w = gt_seg.shape[:2] + assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ + int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ + "gt_seg size not align. h:{} w:{}".format(h, w) + results[key] = gt_seg + + def __call__(self, results): + """Call function to resize images, bounding boxes, masks, semantic + segmentation map. + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', + 'keep_ratio' keys are added into result dict. + """ + + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})') + return repr_str \ No newline at end of file diff --git a/tools/analyze_logs.py b/tools/analyze_logs.py new file mode 100644 index 0000000000000000000000000000000000000000..c3a468b5542e3f041c255a1ab7368698416b2788 --- /dev/null +++ b/tools/analyze_logs.py @@ -0,0 +1,123 @@ +"""Modified from https://github.com/open- +mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py.""" +import argparse +import json +from collections import defaultdict + +import matplotlib.pyplot as plt +import seaborn as sns + + +def plot_curve(log_dicts, args): + if args.backend is not None: + plt.switch_backend(args.backend) + sns.set_style(args.style) + # if legend is None, use {filename}_{key} as legend + legend = args.legend + if legend is None: + legend = [] + for json_log in args.json_logs: + for metric in args.keys: + legend.append(f'{json_log}_{metric}') + assert len(legend) == (len(args.json_logs) * len(args.keys)) + metrics = args.keys + + num_metrics = len(metrics) + for i, log_dict in enumerate(log_dicts): + epochs = list(log_dict.keys()) + for j, metric in enumerate(metrics): + print(f'plot curve of {args.json_logs[i]}, metric is {metric}') + plot_epochs = [] + plot_iters = [] + plot_values = [] + for epoch in epochs: + epoch_logs = log_dict[epoch] + if metric not in epoch_logs.keys(): + continue + if metric in ['mIoU', 'mAcc', 'aAcc']: + plot_epochs.append(epoch) + plot_values.append(epoch_logs[metric][0]) + else: + for idx in range(len(epoch_logs[metric])): + plot_iters.append(epoch_logs['iter'][idx]) + plot_values.append(epoch_logs[metric][idx]) + ax = plt.gca() + label = legend[i * num_metrics + j] + if metric in ['mIoU', 'mAcc', 'aAcc']: + ax.set_xticks(plot_epochs) + plt.xlabel('epoch') + plt.plot(plot_epochs, plot_values, label=label, marker='o') + else: + plt.xlabel('iter') + plt.plot(plot_iters, plot_values, label=label, linewidth=0.5) + plt.legend() + if args.title is not None: + plt.title(args.title) + if args.out is None: + plt.show() + else: + print(f'save curve to: {args.out}') + plt.savefig(args.out) + plt.cla() + + +def parse_args(): + parser = argparse.ArgumentParser(description='Analyze Json Log') + parser.add_argument( + 'json_logs', + type=str, + nargs='+', + help='path of train log in json format') + parser.add_argument( + '--keys', + type=str, + nargs='+', + default=['mIoU'], + help='the metric that you want to plot') + parser.add_argument('--title', type=str, help='title of figure') + parser.add_argument( + '--legend', + type=str, + nargs='+', + default=None, + help='legend of each plot') + parser.add_argument( + '--backend', type=str, default=None, help='backend of plt') + parser.add_argument( + '--style', type=str, default='dark', help='style of plt') + parser.add_argument('--out', type=str, default=None) + args = parser.parse_args() + return args + + +def load_json_logs(json_logs): + # load and convert json_logs to log_dict, key is epoch, value is a sub dict + # keys of sub dict is different metrics + # value of sub dict is a list of corresponding values of all iterations + log_dicts = [dict() for _ in json_logs] + for json_log, log_dict in zip(json_logs, log_dicts): + with open(json_log, 'r') as log_file: + for line in log_file: + log = json.loads(line.strip()) + # skip lines without `epoch` field + if 'epoch' not in log: + continue + epoch = log.pop('epoch') + if epoch not in log_dict: + log_dict[epoch] = defaultdict(list) + for k, v in log.items(): + log_dict[epoch][k].append(v) + return log_dicts + + +def main(): + args = parse_args() + json_logs = args.json_logs + for json_log in json_logs: + assert json_log.endswith('.json') + log_dicts = load_json_logs(json_logs) + plot_curve(log_dicts, args) + + +if __name__ == '__main__': + main() diff --git a/tools/benchmark.py b/tools/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..0a6179358518eafbd04a190fa4f856e10cb66b7b --- /dev/null +++ b/tools/benchmark.py @@ -0,0 +1,85 @@ +import argparse +import time + +import torch +from mmcv import Config +from mmcv.parallel import MMDataParallel +from mmcv.runner import load_checkpoint, wrap_fp16_model + +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models import build_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMSeg benchmark a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--log-interval', type=int, default=50, help='interval of logging') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + # set cudnn_benchmark + torch.backends.cudnn.benchmark = False + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=False, + shuffle=False) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + load_checkpoint(model, args.checkpoint, map_location='cpu') + + model = MMDataParallel(model, device_ids=[0]) + + model.eval() + + # the first several iterations may be very slow so skip them + num_warmup = 5 + pure_inf_time = 0 + total_iters = 200 + + # benchmark with 200 image and take the average + for i, data in enumerate(data_loader): + + torch.cuda.synchronize() + start_time = time.perf_counter() + + with torch.no_grad(): + model(return_loss=False, rescale=True, **data) + + torch.cuda.synchronize() + elapsed = time.perf_counter() - start_time + + if i >= num_warmup: + pure_inf_time += elapsed + if (i + 1) % args.log_interval == 0: + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Done image [{i + 1:<3}/ {total_iters}], ' + f'fps: {fps:.2f} img / s') + + if (i + 1) == total_iters: + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Overall fps: {fps:.2f} img / s') + break + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/chase_db1.py b/tools/convert_datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..56bb210edbb359caf57b6048b8e09b94ef295152 --- /dev/null +++ b/tools/convert_datasets/chase_db1.py @@ -0,0 +1,87 @@ +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv + +CHASE_DB1_LEN = 28 * 3 +TRAINING_LEN = 60 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert CHASE_DB1 dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='path of CHASEDB1.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'CHASE_DB1') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting CHASEDB1.zip...') + zip_file = zipfile.ZipFile(dataset_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + + assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN) + + for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(img_name)[0] + '.png')) + else: + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.png')) + + for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(img_name)[0] + '.png')) + else: + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/cityscapes.py b/tools/convert_datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..99d05b41f58c1d2b92a6cbb4f5859c6d696d0ac8 --- /dev/null +++ b/tools/convert_datasets/cityscapes.py @@ -0,0 +1,55 @@ +import argparse +import os.path as osp + +import mmcv +from cityscapesscripts.preparation.json2labelImg import json2labelImg + + +def convert_json_to_label(json_file): + label_file = json_file.replace('_polygons.json', '_labelTrainIds.png') + json2labelImg(json_file, label_file, 'trainIds') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert Cityscapes annotations to TrainIds') + parser.add_argument('cityscapes_path', help='cityscapes data path') + parser.add_argument('--gt-dir', default='gtFine', type=str) + parser.add_argument('-o', '--out-dir', help='output path') + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cityscapes_path = args.cityscapes_path + out_dir = args.out_dir if args.out_dir else cityscapes_path + mmcv.mkdir_or_exist(out_dir) + + gt_dir = osp.join(cityscapes_path, args.gt_dir) + + poly_files = [] + for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True): + poly_file = osp.join(gt_dir, poly) + poly_files.append(poly_file) + if args.nproc > 1: + mmcv.track_parallel_progress(convert_json_to_label, poly_files, + args.nproc) + else: + mmcv.track_progress(convert_json_to_label, poly_files) + + split_names = ['train', 'val', 'test'] + + for split in split_names: + filenames = [] + for poly in mmcv.scandir( + osp.join(gt_dir, split), '_polygons.json', recursive=True): + filenames.append(poly.replace('_gtFine_polygons.json', '')) + with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: + f.writelines(f + '\n' for f in filenames) + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/drive.py b/tools/convert_datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..891f06f725cc7be9da8c65bc0dc56008b8313e30 --- /dev/null +++ b/tools/convert_datasets/drive.py @@ -0,0 +1,112 @@ +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import cv2 +import mmcv + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert DRIVE dataset to mmsegmentation format') + parser.add_argument( + 'training_path', help='the training part of DRIVE dataset') + parser.add_argument( + 'testing_path', help='the testing part of DRIVE dataset') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + training_path = args.training_path + testing_path = args.testing_path + if args.out_dir is None: + out_dir = osp.join('data', 'DRIVE') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting training.zip...') + zip_file = zipfile.ZipFile(training_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + now_dir = osp.join(tmp_dir, 'training', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join( + out_dir, 'images', 'training', + osp.splitext(img_name)[0].replace('_training', '') + + '.png')) + + now_dir = osp.join(tmp_dir, 'training', '1st_manual') + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.png')) + + print('Extracting test.zip...') + zip_file = zipfile.ZipFile(testing_path) + zip_file.extractall(tmp_dir) + + print('Generating validation dataset...') + now_dir = osp.join(tmp_dir, 'test', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join( + out_dir, 'images', 'validation', + osp.splitext(img_name)[0].replace('_test', '') + '.png')) + + now_dir = osp.join(tmp_dir, 'test', '1st_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + now_dir = osp.join(tmp_dir, 'test', '2nd_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/hrf.py b/tools/convert_datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..bdeb6e7e5668e097f30bc019c88f9eab6c7fcf07 --- /dev/null +++ b/tools/convert_datasets/hrf.py @@ -0,0 +1,110 @@ +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv + +HRF_LEN = 15 +TRAINING_LEN = 5 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert HRF dataset to mmsegmentation format') + parser.add_argument('healthy_path', help='the path of healthy.zip') + parser.add_argument( + 'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip') + parser.add_argument('glaucoma_path', help='the path of glaucoma.zip') + parser.add_argument( + 'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip') + parser.add_argument( + 'diabetic_retinopathy_path', + help='the path of diabetic_retinopathy.zip') + parser.add_argument( + 'diabetic_retinopathy_manualsegm_path', + help='the path of diabetic_retinopathy_manualsegm.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + images_path = [ + args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path + ] + annotations_path = [ + args.healthy_manualsegm_path, args.glaucoma_manualsegm_path, + args.diabetic_retinopathy_manualsegm_path + ] + if args.out_dir is None: + out_dir = osp.join('data', 'HRF') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + print('Generating images...') + for now_path in images_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.png')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Generating annotations...') + for now_path in annotations_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/pascal_context.py b/tools/convert_datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..dc49ab7ad8fd359c458ec4b6190ed61851426031 --- /dev/null +++ b/tools/convert_datasets/pascal_context.py @@ -0,0 +1,86 @@ +import argparse +import os.path as osp +from functools import partial + +import mmcv +import numpy as np +from detail import Detail +from PIL import Image + +_mapping = np.sort( + np.array([ + 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, + 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, + 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, + 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 + ])) +_key = np.array(range(len(_mapping))).astype('uint8') + + +def generate_labels(img_id, detail, out_dir): + + def _class_to_index(mask, _mapping, _key): + # assert the values + values = np.unique(mask) + for i in range(len(values)): + assert (values[i] in _mapping) + index = np.digitize(mask.ravel(), _mapping, right=True) + return _key[index].reshape(mask.shape) + + mask = Image.fromarray( + _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key)) + filename = img_id['file_name'] + mask.save(osp.join(out_dir, filename.replace('jpg', 'png'))) + return osp.splitext(osp.basename(filename))[0] + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmsegmentation format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('json_path', help='annoation json filepath') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext') + else: + out_dir = args.out_dir + json_path = args.json_path + mmcv.mkdir_or_exist(out_dir) + img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages') + + train_detail = Detail(json_path, img_dir, 'train') + train_ids = train_detail.getImgs() + + val_detail = Detail(json_path, img_dir, 'val') + val_ids = val_detail.getImgs() + + mmcv.mkdir_or_exist( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext')) + + train_list = mmcv.track_progress( + partial(generate_labels, detail=train_detail, out_dir=out_dir), + train_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'train.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(train_list)) + + val_list = mmcv.track_progress( + partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'val.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(val_list)) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/stare.py b/tools/convert_datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..6238d62f64de9406ef84ebb4667d7c0e1ce8a8c5 --- /dev/null +++ b/tools/convert_datasets/stare.py @@ -0,0 +1,165 @@ +import argparse +import gzip +import os +import os.path as osp +import tarfile +import tempfile + +import mmcv + +STARE_LEN = 20 +TRAINING_LEN = 10 + + +def un_gz(src, dst): + g_file = gzip.GzipFile(src) + with open(dst, 'wb+') as f: + f.write(g_file.read()) + g_file.close() + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert STARE dataset to mmsegmentation format') + parser.add_argument('image_path', help='the path of stare-images.tar') + parser.add_argument('labels_ah', help='the path of labels-ah.tar') + parser.add_argument('labels_vk', help='the path of labels-vk.tar') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + image_path = args.image_path + labels_ah = args.labels_ah + labels_vk = args.labels_vk + if args.out_dir is None: + out_dir = osp.join('data', 'STARE') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting stare-images.tar...') + with tarfile.open(image_path) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-ah.tar...') + with tarfile.open(labels_ah) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a threshold + # to convert the nonstandard annotation imgs. The value divided by + # 128 equivalent to '1 if value >= 128 else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-vk.tar...') + with tarfile.open(labels_vk) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/convert_datasets/voc_aug.py b/tools/convert_datasets/voc_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..942746351b64b2e931cb18ce684a1f3ccf7e3866 --- /dev/null +++ b/tools/convert_datasets/voc_aug.py @@ -0,0 +1,91 @@ +import argparse +import os.path as osp +from functools import partial + +import mmcv +import numpy as np +from PIL import Image +from scipy.io import loadmat + +AUG_LEN = 10582 + + +def convert_mat(mat_file, in_dir, out_dir): + data = loadmat(osp.join(in_dir, mat_file)) + mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8) + seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png')) + Image.fromarray(mask).save(seg_filename, 'PNG') + + +def generate_aug_list(merged_list, excluded_list): + return list(set(merged_list) - set(excluded_list)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmsegmentation format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('aug_path', help='pascal voc aug path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + aug_path = args.aug_path + nproc = args.nproc + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug') + else: + out_dir = args.out_dir + mmcv.mkdir_or_exist(out_dir) + in_dir = osp.join(aug_path, 'dataset', 'cls') + + mmcv.track_parallel_progress( + partial(convert_mat, in_dir=in_dir, out_dir=out_dir), + list(mmcv.scandir(in_dir, suffix='.mat')), + nproc=nproc) + + full_aug_list = [] + with open(osp.join(aug_path, 'dataset', 'train.txt')) as f: + full_aug_list += [line.strip() for line in f] + with open(osp.join(aug_path, 'dataset', 'val.txt')) as f: + full_aug_list += [line.strip() for line in f] + + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'train.txt')) as f: + ori_train_list = [line.strip() for line in f] + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'val.txt')) as f: + val_list = [line.strip() for line in f] + + aug_train_list = generate_aug_list(ori_train_list + full_aug_list, + val_list) + assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format( + AUG_LEN) + + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'trainaug.txt'), 'w') as f: + f.writelines(line + '\n' for line in aug_train_list) + + aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list) + assert len(aug_list) == AUG_LEN - len( + ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN - + len(ori_train_list)) + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'), + 'w') as f: + f.writelines(line + '\n' for line in aug_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dist_test.sh b/tools/dist_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..34fb46541d447d923bc3561bbba09bd5c94d7bfe --- /dev/null +++ b/tools/dist_test.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CONFIG=$1 +CHECKPOINT=$2 +GPUS=$3 +PORT=${PORT:-29500} +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ + $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} diff --git a/tools/dist_train.sh b/tools/dist_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..5b43fffbf28fc9b8ba7c14efcd5e4f8b19279470 --- /dev/null +++ b/tools/dist_train.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CONFIG=$1 +GPUS=$2 +PORT=${PORT:-29500} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ + $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} diff --git a/tools/get_flops.py b/tools/get_flops.py new file mode 100644 index 0000000000000000000000000000000000000000..bc98c5252591b0c9ec218144c0652ea695a5b96e --- /dev/null +++ b/tools/get_flops.py @@ -0,0 +1,58 @@ +import argparse + +from mmcv import Config +from mmcv.cnn import get_model_complexity_info + +from mmseg.models import build_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[2048, 1024], + help='input image size') + args = parser.parse_args() + return args + + +def main(): + + args = parse_args() + + if len(args.shape) == 1: + input_shape = (3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = (3, ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + cfg = Config.fromfile(args.config) + cfg.model.pretrained = None + model = build_segmentor( + cfg.model, + train_cfg=cfg.get('train_cfg'), + test_cfg=cfg.get('test_cfg')).cuda() + model.eval() + + if hasattr(model, 'forward_dummy'): + model.forward = model.forward_dummy + else: + raise NotImplementedError( + 'FLOPs counter is currently not currently supported with {}'. + format(model.__class__.__name__)) + + flops, params = get_model_complexity_info(model, input_shape) + split_line = '=' * 30 + print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( + split_line, input_shape, flops, params)) + print('!!!Please be cautious if you use the results in papers. ' + 'You may need to check if all ops are supported and verify that the ' + 'flops computation is correct.') + + +if __name__ == '__main__': + main() diff --git a/tools/ort_test.py b/tools/ort_test.py new file mode 100644 index 0000000000000000000000000000000000000000..807b21272a04c86176c19de45fb2407b71e33319 --- /dev/null +++ b/tools/ort_test.py @@ -0,0 +1,191 @@ +import argparse +import os +import os.path as osp +import warnings + +import mmcv +import numpy as np +import onnxruntime as ort +import torch +from mmcv.parallel import MMDataParallel +from mmcv.runner import get_dist_info +from mmcv.utils import DictAction + +from mmseg.apis import single_gpu_test +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models.segmentors.base import BaseSegmentor + + +class ONNXRuntimeSegmentor(BaseSegmentor): + + def __init__(self, onnx_file, cfg, device_id): + super(ONNXRuntimeSegmentor, self).__init__() + # get the custom op path + ort_custom_op_path = '' + try: + from mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + except (ImportError, ModuleNotFoundError): + warnings.warn('If input model has custom op from mmcv, \ + you may have to build mmcv with ONNXRuntime from source.') + session_options = ort.SessionOptions() + # register custom op for onnxruntime + if osp.exists(ort_custom_op_path): + session_options.register_custom_ops_library(ort_custom_op_path) + sess = ort.InferenceSession(onnx_file, session_options) + providers = ['CPUExecutionProvider'] + options = [{}] + is_cuda_available = ort.get_device() == 'GPU' + if is_cuda_available: + providers.insert(0, 'CUDAExecutionProvider') + options.insert(0, {'device_id': device_id}) + + sess.set_providers(providers, options) + + self.sess = sess + self.device_id = device_id + self.io_binding = sess.io_binding() + self.output_names = [_.name for _ in sess.get_outputs()] + for name in self.output_names: + self.io_binding.bind_output(name) + self.cfg = cfg + self.test_mode = cfg.model.test_cfg.mode + + def extract_feat(self, imgs): + raise NotImplementedError('This method is not implemented.') + + def encode_decode(self, img, img_metas): + raise NotImplementedError('This method is not implemented.') + + def forward_train(self, imgs, img_metas, **kwargs): + raise NotImplementedError('This method is not implemented.') + + def simple_test(self, img, img_meta, **kwargs): + device_type = img.device.type + self.io_binding.bind_input( + name='input', + device_type=device_type, + device_id=self.device_id, + element_type=np.float32, + shape=img.shape, + buffer_ptr=img.data_ptr()) + self.sess.run_with_iobinding(self.io_binding) + seg_pred = self.io_binding.copy_outputs_to_cpu()[0] + # whole might support dynamic reshape + ori_shape = img_meta[0]['ori_shape'] + if not (ori_shape[0] == seg_pred.shape[-2] + and ori_shape[1] == seg_pred.shape[-1]): + seg_pred = torch.from_numpy(seg_pred).float() + seg_pred = torch.nn.functional.interpolate( + seg_pred, size=tuple(ori_shape[:2]), mode='nearest') + seg_pred = seg_pred.long().detach().cpu().numpy() + seg_pred = seg_pred[0] + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, **kwargs): + raise NotImplementedError('This method is not implemented.') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='mmseg onnxruntime backend test (and eval) a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('model', help='Input model file') + parser.add_argument('--out', help='output result file in pickle format') + parser.add_argument( + '--format-only', + action='store_true', + help='Format the output results without perform evaluation. It is' + 'useful when you want to format the result to a specific format and ' + 'submit it to the test server') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' + ' for generic datasets, and "cityscapes" for Cityscapes') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument( + '--show-dir', help='directory where painted images will be saved') + parser.add_argument( + '--options', nargs='+', action=DictAction, help='custom options') + parser.add_argument( + '--eval-options', + nargs='+', + action=DictAction, + help='custom options for evaluation') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + return args + + +def main(): + args = parse_args() + + assert args.out or args.eval or args.format_only or args.show \ + or args.show_dir, \ + ('Please specify at least one operation (save/eval/format/show the ' + 'results / save the results) with the argument "--out", "--eval"' + ', "--format-only", "--show" or "--show-dir"') + + if args.eval and args.format_only: + raise ValueError('--eval and --format_only cannot be both specified') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + + cfg = mmcv.Config.fromfile(args.config) + if args.options is not None: + cfg.merge_from_dict(args.options) + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + # init distributed env first, since logger depends on the dist info. + distributed = False + + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=distributed, + shuffle=False) + + # load onnx config and meta + cfg.model.train_cfg = None + model = ONNXRuntimeSegmentor(args.model, cfg=cfg, device_id=0) + model.CLASSES = dataset.CLASSES + model.PALETTE = dataset.PALETTE + + efficient_test = False + if args.eval_options is not None: + efficient_test = args.eval_options.get('efficient_test', False) + + model = MMDataParallel(model, device_ids=[0]) + outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, + efficient_test, args.opacity) + + rank, _ = get_dist_info() + if rank == 0: + if args.out: + print(f'\nwriting results to {args.out}') + mmcv.dump(outputs, args.out) + kwargs = {} if args.eval_options is None else args.eval_options + if args.format_only: + dataset.format_results(outputs, **kwargs) + if args.eval: + dataset.evaluate(outputs, args.eval, **kwargs) + + +if __name__ == '__main__': + main() diff --git a/tools/print_config.py b/tools/print_config.py new file mode 100644 index 0000000000000000000000000000000000000000..88984e420b72598e17220b3c1cdf4347f8eee0d3 --- /dev/null +++ b/tools/print_config.py @@ -0,0 +1,38 @@ +import argparse + +from mmcv import Config, DictAction + +from mmseg.apis import init_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser(description='Print the whole config') + parser.add_argument('config', help='config file path') + parser.add_argument( + '--graph', action='store_true', help='print the models graph') + parser.add_argument( + '--options', nargs='+', action=DictAction, help='arguments in dict') + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + if args.options is not None: + cfg.merge_from_dict(args.options) + print(f'Config:\n{cfg.pretty_text}') + # dump config + cfg.dump('example.py') + # dump models graph + if args.graph: + model = init_segmentor(args.config, device='cpu') + print(f'Model graph:\n{str(model)}') + with open('example-graph.txt', 'w') as f: + f.writelines(str(model)) + + +if __name__ == '__main__': + main() diff --git a/tools/publish_model.py b/tools/publish_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a049f17674b7a1aa730057e5cc294d2368fe707c --- /dev/null +++ b/tools/publish_model.py @@ -0,0 +1,35 @@ +import argparse +import subprocess + +import torch + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Process a checkpoint to be published') + parser.add_argument('in_file', help='input checkpoint filename') + parser.add_argument('out_file', help='output checkpoint filename') + args = parser.parse_args() + return args + + +def process_checkpoint(in_file, out_file): + checkpoint = torch.load(in_file, map_location='cpu') + # remove optimizer for smaller file size + if 'optimizer' in checkpoint: + del checkpoint['optimizer'] + # if it is necessary to remove some sensitive data in checkpoint['meta'], + # add the code here. + torch.save(checkpoint, out_file) + sha = subprocess.check_output(['sha256sum', out_file]).decode() + final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) + subprocess.Popen(['mv', out_file, final_file]) + + +def main(): + args = parse_args() + process_checkpoint(args.in_file, args.out_file) + + +if __name__ == '__main__': + main() diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..5660ed9004b9a75ea5e1206a4de0b152f13f66f0 --- /dev/null +++ b/tools/pytorch2onnx.py @@ -0,0 +1,389 @@ +import argparse +from functools import partial + +import mmcv +import numpy as np +import onnxruntime as rt +import torch +import torch._C +import torch.serialization +from mmcv import DictAction +from mmcv.onnx import register_extra_symbolics +from mmcv.runner import load_checkpoint +from torch import nn + +from mmseg.apis import show_result_pyplot +from mmseg.apis.inference import LoadImage +from mmseg.datasets.pipelines import Compose +from mmseg.models import build_segmentor + +torch.manual_seed(3) + + +def _convert_batchnorm(module): + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm2d(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + module_output.weight.data = module.weight.data.clone().detach() + module_output.bias.data = module.bias.data.clone().detach() + # keep requires_grad unchanged + module_output.weight.requires_grad = module.weight.requires_grad + module_output.bias.requires_grad = module.bias.requires_grad + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, _convert_batchnorm(child)) + del module + return module_output + + +def _demo_mm_inputs(input_shape, num_classes): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + rng = np.random.RandomState(0) + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + } for _ in range(N)] + mm_inputs = { + 'imgs': torch.FloatTensor(imgs).requires_grad_(True), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +def _prepare_input_img(img_path, + test_pipeline, + shape=None, + rescale_shape=None): + # build the data pipeline + if shape is not None: + test_pipeline[1]['img_scale'] = (shape[1], shape[0]) + test_pipeline[1]['transforms'][0]['keep_ratio'] = False + test_pipeline = [LoadImage()] + test_pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img_path) + data = test_pipeline(data) + imgs = data['img'] + img_metas = [i.data for i in data['img_metas']] + + if rescale_shape is not None: + for img_meta in img_metas: + img_meta['ori_shape'] = tuple(rescale_shape) + (3, ) + + mm_inputs = {'imgs': imgs, 'img_metas': img_metas} + + return mm_inputs + + +def _update_input_img(img_list, img_meta_list): + # update img and its meta list + N = img_list[0].size(0) + img_meta = img_meta_list[0][0] + img_shape = img_meta['img_shape'] + ori_shape = img_meta['ori_shape'] + pad_shape = img_meta['pad_shape'] + new_img_meta_list = [[{ + 'img_shape': + img_shape, + 'ori_shape': + ori_shape, + 'pad_shape': + pad_shape, + 'filename': + img_meta['filename'], + 'scale_factor': + (img_shape[1] / ori_shape[1], img_shape[0] / ori_shape[0]) * 2, + 'flip': + False, + } for _ in range(N)]] + + return img_list, new_img_meta_list + + +def pytorch2onnx(model, + mm_inputs, + opset_version=11, + show=False, + output_file='tmp.onnx', + verify=False, + dynamic_export=False): + """Export Pytorch model to ONNX model and verify the outputs are same + between Pytorch and ONNX. + + Args: + model (nn.Module): Pytorch model we want to export. + mm_inputs (dict): Contain the input tensors and img_metas information. + opset_version (int): The onnx op version. Default: 11. + show (bool): Whether print the computation graph. Default: False. + output_file (string): The path to where we store the output ONNX model. + Default: `tmp.onnx`. + verify (bool): Whether compare the outputs between Pytorch and ONNX. + Default: False. + dynamic_export (bool): Whether to export ONNX with dynamic axis. + Default: False. + """ + model.cpu().eval() + test_mode = model.test_cfg.mode + + if isinstance(model.decode_head, nn.ModuleList): + num_classes = model.decode_head[-1].num_classes + else: + num_classes = model.decode_head.num_classes + + imgs = mm_inputs.pop('imgs') + img_metas = mm_inputs.pop('img_metas') + + img_list = [img[None, :] for img in imgs] + img_meta_list = [[img_meta] for img_meta in img_metas] + # update img_meta + img_list, img_meta_list = _update_input_img(img_list, img_meta_list) + + # replace original forward function + origin_forward = model.forward + model.forward = partial( + model.forward, + img_metas=img_meta_list, + return_loss=False, + rescale=True) + dynamic_axes = None + if dynamic_export: + if test_mode == 'slide': + dynamic_axes = {'input': {0: 'batch'}, 'output': {1: 'batch'}} + else: + dynamic_axes = { + 'input': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + 'output': { + 1: 'batch', + 2: 'height', + 3: 'width' + } + } + + register_extra_symbolics(opset_version) + with torch.no_grad(): + torch.onnx.export( + model, (img_list, ), + output_file, + input_names=['input'], + output_names=['output'], + export_params=True, + keep_initializers_as_inputs=False, + verbose=show, + opset_version=opset_version, + dynamic_axes=dynamic_axes) + print(f'Successfully exported ONNX model: {output_file}') + model.forward = origin_forward + + if verify: + # check by onnx + import onnx + onnx_model = onnx.load(output_file) + onnx.checker.check_model(onnx_model) + + if dynamic_export and test_mode == 'whole': + # scale image for dynamic shape test + img_list = [ + nn.functional.interpolate(_, scale_factor=1.5) + for _ in img_list + ] + # concate flip image for batch test + flip_img_list = [_.flip(-1) for _ in img_list] + img_list = [ + torch.cat((ori_img, flip_img), 0) + for ori_img, flip_img in zip(img_list, flip_img_list) + ] + + # update img_meta + img_list, img_meta_list = _update_input_img( + img_list, img_meta_list) + + # check the numerical value + # get pytorch output + with torch.no_grad(): + pytorch_result = model(img_list, img_meta_list, return_loss=False) + pytorch_result = np.stack(pytorch_result, 0) + + # get onnx output + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [ + node.name for node in onnx_model.graph.initializer + ] + net_feed_input = list(set(input_all) - set(input_initializer)) + assert (len(net_feed_input) == 1) + sess = rt.InferenceSession(output_file) + onnx_result = sess.run( + None, {net_feed_input[0]: img_list[0].detach().numpy()})[0][0] + # show segmentation results + if show: + import cv2 + import os.path as osp + img = img_meta_list[0][0]['filename'] + if not osp.exists(img): + img = imgs[0][:3, ...].permute(1, 2, 0) * 255 + img = img.detach().numpy().astype(np.uint8) + ori_shape = img.shape[:2] + else: + ori_shape = LoadImage()({'img': img})['ori_shape'] + + # resize onnx_result to ori_shape + onnx_result_ = cv2.resize(onnx_result[0].astype(np.uint8), + (ori_shape[1], ori_shape[0])) + show_result_pyplot( + model, + img, (onnx_result_, ), + palette=model.PALETTE, + block=False, + title='ONNXRuntime', + opacity=0.5) + + # resize pytorch_result to ori_shape + pytorch_result_ = cv2.resize(pytorch_result[0].astype(np.uint8), + (ori_shape[1], ori_shape[0])) + show_result_pyplot( + model, + img, (pytorch_result_, ), + title='PyTorch', + palette=model.PALETTE, + opacity=0.5) + # compare results + np.testing.assert_allclose( + pytorch_result.astype(np.float32) / num_classes, + onnx_result.astype(np.float32) / num_classes, + rtol=1e-5, + atol=1e-5, + err_msg='The outputs are different between Pytorch and ONNX') + print('The outputs are same between Pytorch and ONNX') + + +def parse_args(): + parser = argparse.ArgumentParser(description='Convert MMSeg to ONNX') + parser.add_argument('config', help='test config file path') + parser.add_argument('--checkpoint', help='checkpoint file', default=None) + parser.add_argument( + '--input-img', type=str, help='Images for input', default=None) + parser.add_argument( + '--show', + action='store_true', + help='show onnx graph and segmentation results') + parser.add_argument( + '--verify', action='store_true', help='verify the onnx model') + parser.add_argument('--output-file', type=str, default='tmp.onnx') + parser.add_argument('--opset-version', type=int, default=11) + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=None, + help='input image height and width.') + parser.add_argument( + '--rescale_shape', + type=int, + nargs='+', + default=None, + help='output image rescale height and width, work for slide mode.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='Override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--dynamic-export', + action='store_true', + help='Whether to export onnx with dynamic axis.') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + + cfg = mmcv.Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + cfg.model.pretrained = None + + if args.shape is None: + img_scale = cfg.test_pipeline[1]['img_scale'] + input_shape = (1, 3, img_scale[1], img_scale[0]) + elif len(args.shape) == 1: + input_shape = (1, 3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = ( + 1, + 3, + ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + test_mode = cfg.model.test_cfg.mode + + # build the model and load checkpoint + cfg.model.train_cfg = None + segmentor = build_segmentor( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + # convert SyncBN to BN + segmentor = _convert_batchnorm(segmentor) + + if args.checkpoint: + checkpoint = load_checkpoint( + segmentor, args.checkpoint, map_location='cpu') + segmentor.CLASSES = checkpoint['meta']['CLASSES'] + segmentor.PALETTE = checkpoint['meta']['PALETTE'] + + # read input or create dummpy input + if args.input_img is not None: + preprocess_shape = (input_shape[2], input_shape[3]) + rescale_shape = None + if args.rescale_shape is not None: + rescale_shape = [args.rescale_shape[0], args.rescale_shape[1]] + mm_inputs = _prepare_input_img( + args.input_img, + cfg.data.test.pipeline, + shape=preprocess_shape, + rescale_shape=rescale_shape) + else: + if isinstance(segmentor.decode_head, nn.ModuleList): + num_classes = segmentor.decode_head[-1].num_classes + else: + num_classes = segmentor.decode_head.num_classes + mm_inputs = _demo_mm_inputs(input_shape, num_classes) + + # convert model to onnx file + pytorch2onnx( + segmentor, + mm_inputs, + opset_version=args.opset_version, + show=args.show, + output_file=args.output_file, + verify=args.verify, + dynamic_export=args.dynamic_export) diff --git a/tools/pytorch2torchscript.py b/tools/pytorch2torchscript.py new file mode 100644 index 0000000000000000000000000000000000000000..206c4bb457ece3901fbd536db1c666290f217aa4 --- /dev/null +++ b/tools/pytorch2torchscript.py @@ -0,0 +1,184 @@ +import argparse + +import mmcv +import numpy as np +import torch +import torch._C +import torch.serialization +from mmcv.runner import load_checkpoint +from torch import nn + +from mmseg.models import build_segmentor + +torch.manual_seed(3) + + +def digit_version(version_str): + digit_version = [] + for x in version_str.split('.'): + if x.isdigit(): + digit_version.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + digit_version.append(int(patch_version[0]) - 1) + digit_version.append(int(patch_version[1])) + return digit_version + + +def check_torch_version(): + torch_minimum_version = '1.8.0' + torch_version = digit_version(torch.__version__) + + assert (torch_version >= digit_version(torch_minimum_version)), \ + f'Torch=={torch.__version__} is not support for converting to ' \ + f'torchscript. Please install pytorch>={torch_minimum_version}.' + + +def _convert_batchnorm(module): + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm2d(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + module_output.weight.data = module.weight.data.clone().detach() + module_output.bias.data = module.bias.data.clone().detach() + # keep requires_grad unchanged + module_output.weight.requires_grad = module.weight.requires_grad + module_output.bias.requires_grad = module.bias.requires_grad + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, _convert_batchnorm(child)) + del module + return module_output + + +def _demo_mm_inputs(input_shape, num_classes): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + rng = np.random.RandomState(0) + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + } for _ in range(N)] + mm_inputs = { + 'imgs': torch.FloatTensor(imgs).requires_grad_(True), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +def pytorch2libtorch(model, + input_shape, + show=False, + output_file='tmp.pt', + verify=False): + """Export Pytorch model to TorchScript model and verify the outputs are + same between Pytorch and TorchScript. + + Args: + model (nn.Module): Pytorch model we want to export. + input_shape (tuple): Use this input shape to construct + the corresponding dummy input and execute the model. + show (bool): Whether print the computation graph. Default: False. + output_file (string): The path to where we store the + output TorchScript model. Default: `tmp.pt`. + verify (bool): Whether compare the outputs between + Pytorch and TorchScript. Default: False. + """ + if isinstance(model.decode_head, nn.ModuleList): + num_classes = model.decode_head[-1].num_classes + else: + num_classes = model.decode_head.num_classes + + mm_inputs = _demo_mm_inputs(input_shape, num_classes) + + imgs = mm_inputs.pop('imgs') + + # replace the orginal forword with forward_dummy + model.forward = model.forward_dummy + model.eval() + traced_model = torch.jit.trace( + model, + example_inputs=imgs, + check_trace=verify, + ) + + if show: + print(traced_model.graph) + + traced_model.save(output_file) + print('Successfully exported TorchScript model: {}'.format(output_file)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert MMSeg to TorchScript') + parser.add_argument('config', help='test config file path') + parser.add_argument('--checkpoint', help='checkpoint file', default=None) + parser.add_argument( + '--show', action='store_true', help='show TorchScript graph') + parser.add_argument( + '--verify', action='store_true', help='verify the TorchScript model') + parser.add_argument('--output-file', type=str, default='tmp.pt') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[512, 512], + help='input image size (height, width)') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + check_torch_version() + + if len(args.shape) == 1: + input_shape = (1, 3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = ( + 1, + 3, + ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + cfg = mmcv.Config.fromfile(args.config) + cfg.model.pretrained = None + + # build the model and load checkpoint + cfg.model.train_cfg = None + segmentor = build_segmentor( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + # convert SyncBN to BN + segmentor = _convert_batchnorm(segmentor) + + if args.checkpoint: + load_checkpoint(segmentor, args.checkpoint, map_location='cpu') + + # convert the PyTorch model to LibTorch model + pytorch2libtorch( + segmentor, + input_shape, + show=args.show, + output_file=args.output_file, + verify=args.verify) diff --git a/tools/slurm_test.sh b/tools/slurm_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e6f7bf4e33267f269cf0f455924cb70166ccd4b --- /dev/null +++ b/tools/slurm_test.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +CHECKPOINT=$4 +GPUS=${GPUS:-4} +GPUS_PER_NODE=${GPUS_PER_NODE:-4} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +PY_ARGS=${@:5} +SRUN_ARGS=${SRUN_ARGS:-""} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} diff --git a/tools/slurm_train.sh b/tools/slurm_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..ab232105f0309c720ed81a522eca14b6fbd64afd --- /dev/null +++ b/tools/slurm_train.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +GPUS=${GPUS:-4} +GPUS_PER_NODE=${GPUS_PER_NODE:-4} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +SRUN_ARGS=${SRUN_ARGS:-""} +PY_ARGS=${@:4} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 0000000000000000000000000000000000000000..fd8589c02957c1702446dfa4f1ed6010e9adf93b --- /dev/null +++ b/tools/test.py @@ -0,0 +1,157 @@ +import argparse +import os + +import mmcv +import torch +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, + wrap_fp16_model) +from mmcv.utils import DictAction + +from mmseg.apis import multi_gpu_test, single_gpu_test +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models import build_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser( + description='mmseg test (and eval) a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--aug-test', action='store_true', help='Use Flip and Multi scale aug') + parser.add_argument('--out', help='output result file in pickle format') + parser.add_argument( + '--format-only', + action='store_true', + help='Format the output results without perform evaluation. It is' + 'useful when you want to format the result to a specific format and ' + 'submit it to the test server') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' + ' for generic datasets, and "cityscapes" for Cityscapes') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument( + '--show-dir', help='directory where painted images will be saved') + parser.add_argument( + '--gpu-collect', + action='store_true', + help='whether to use gpu to collect results.') + parser.add_argument( + '--tmpdir', + help='tmp directory used for collecting results from multiple ' + 'workers, available when gpu_collect is not specified') + parser.add_argument( + '--options', nargs='+', action=DictAction, help='custom options') + parser.add_argument( + '--eval-options', + nargs='+', + action=DictAction, + help='custom options for evaluation') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + return args + + +def main(): + args = parse_args() + + assert args.out or args.eval or args.format_only or args.show \ + or args.show_dir, \ + ('Please specify at least one operation (save/eval/format/show the ' + 'results / save the results) with the argument "--out", "--eval"' + ', "--format-only", "--show" or "--show-dir"') + + if args.eval and args.format_only: + raise ValueError('--eval and --format_only cannot be both specified') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + + cfg = mmcv.Config.fromfile(args.config) + if args.options is not None: + cfg.merge_from_dict(args.options) + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + if args.aug_test: + # hard code index + cfg.data.test.pipeline[1].img_ratios = [ + 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 + ] + cfg.data.test.pipeline[1].flip = True + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=distributed, + shuffle=False) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + + efficient_test = False + if args.eval_options is not None: + efficient_test = args.eval_options.get('efficient_test', False) + + if not distributed: + model = MMDataParallel(model, device_ids=[0]) + outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, + efficient_test, args.opacity) + else: + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False) + outputs = multi_gpu_test(model, data_loader, args.tmpdir, + args.gpu_collect, efficient_test) + + rank, _ = get_dist_info() + if rank == 0: + if args.out: + print(f'\nwriting results to {args.out}') + mmcv.dump(outputs, args.out) + kwargs = {} if args.eval_options is None else args.eval_options + if args.format_only: + dataset.format_results(outputs, **kwargs) + if args.eval: + dataset.evaluate(outputs, args.eval, **kwargs) + + +if __name__ == '__main__': + main() diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3ea053d706f2cdfae545c55b8719dd181cd7c1e3 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,166 @@ +import argparse +import copy +import os +import os.path as osp +import time + +import mmcv +import torch +from mmcv.runner import init_dist +from mmcv.utils import Config, DictAction, get_git_hash + +from mmseg import __version__ +from mmseg.apis import set_random_seed, train_segmentor +from mmseg.datasets import build_dataset +from mmseg.models import build_segmentor +from mmseg.utils import collect_env, get_root_logger + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--load-from', help='the checkpoint file to load weights from') + parser.add_argument( + '--resume-from', help='the checkpoint file to resume from') + parser.add_argument( + '--no-validate', + action='store_true', + help='whether not to evaluate the checkpoint during training') + group_gpus = parser.add_mutually_exclusive_group() + group_gpus.add_argument( + '--gpus', + type=int, + help='number of gpus to use ' + '(only applicable to non-distributed training)') + group_gpus.add_argument( + '--gpu-ids', + type=int, + nargs='+', + help='ids of gpus to use ' + '(only applicable to non-distributed training)') + parser.add_argument('--seed', type=int, default=None, help='random seed') + parser.add_argument( + '--deterministic', + action='store_true', + help='whether to set deterministic options for CUDNN backend.') + parser.add_argument( + '--options', nargs='+', action=DictAction, help='custom options') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def main(): + args = parse_args() + + if not os.path.exists(args.resume_from): + args.resume_from = None + + cfg = Config.fromfile(args.config) + if args.options is not None: + cfg.merge_from_dict(args.options) + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + if args.load_from is not None: + cfg.load_from = args.load_from + if args.resume_from is not None: + cfg.resume_from = args.resume_from + if args.gpu_ids is not None: + cfg.gpu_ids = args.gpu_ids + else: + cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + # create work_dir + mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) + # dump config + cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) + # init the logger before other steps + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + log_file = osp.join(cfg.work_dir, f'{timestamp}.log') + logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) + + # init the meta dict to record some important information such as + # environment info and seed, which will be logged + meta = dict() + # log env info + env_info_dict = collect_env() + env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) + dash_line = '-' * 60 + '\n' + logger.info('Environment info:\n' + dash_line + env_info + '\n' + + dash_line) + meta['env_info'] = env_info + + # log some basic info + logger.info(f'Distributed training: {distributed}') + logger.info(f'Config:\n{cfg.pretty_text}') + + # set random seeds + if args.seed is not None: + logger.info(f'Set random seed to {args.seed}, deterministic: ' + f'{args.deterministic}') + set_random_seed(args.seed, deterministic=args.deterministic) + cfg.seed = args.seed + meta['seed'] = args.seed + meta['exp_name'] = osp.basename(args.config) + + model = build_segmentor( + cfg.model, + train_cfg=cfg.get('train_cfg'), + test_cfg=cfg.get('test_cfg')) + + logger.info(model) + + datasets = [build_dataset(cfg.data.train)] + if len(cfg.workflow) == 2: + val_dataset = copy.deepcopy(cfg.data.val) + val_dataset.pipeline = cfg.data.train.pipeline + datasets.append(build_dataset(val_dataset)) + if cfg.checkpoint_config is not None: + # save mmseg version, config file content and class names in + # checkpoints as meta data + cfg.checkpoint_config.meta = dict( + mmseg_version=f'{__version__}+{get_git_hash()[:7]}', + config=cfg.pretty_text, + CLASSES=datasets[0].CLASSES, + PALETTE=datasets[0].PALETTE) + # add an attribute for visualization convenience + model.CLASSES = datasets[0].CLASSES + train_segmentor( + model, + datasets, + cfg, + distributed=distributed, + validate=(not args.no_validate), + timestamp=timestamp, + meta=meta) + + +if __name__ == '__main__': + main()