diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..8524e66469ef3356a648c11cddfd7f4e1ecdfffa --- /dev/null +++ b/app.py @@ -0,0 +1,75 @@ +#try: +# import detectron2 +#except: +import os +os.system('pip install git+https://github.com/SysCV/transfiner.git') + +from matplotlib.pyplot import axis +import gradio as gr +import requests +import numpy as np +from torch import nn +import requests + +import torch + +from detectron2 import model_zoo +from detectron2.engine import DefaultPredictor +from detectron2.config import get_cfg +from detectron2.utils.visualizer import Visualizer +from detectron2.data import MetadataCatalog + +''' +url1 = 'https://cdn.pixabay.com/photo/2014/09/07/21/52/city-438393_1280.jpg' +r = requests.get(url1, allow_redirects=True) +open("city1.jpg", 'wb').write(r.content) +url2 = 'https://cdn.pixabay.com/photo/2016/02/19/11/36/canal-1209808_1280.jpg' +r = requests.get(url2, allow_redirects=True) +open("city2.jpg", 'wb').write(r.content) +''' + +model_name='./configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml' + +# model = model_zoo.get(model_name, trained=True) + +cfg = get_cfg() +# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library +cfg.merge_from_file(model_zoo.get_config_file(model_name)) +cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model +# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell +cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name) + +if not torch.cuda.is_available(): + cfg.MODEL.DEVICE='cpu' + +predictor = DefaultPredictor(cfg) + + +def inference(image): + img = np.array(image.resize((1024,1024))) + outputs = predictor(img) + + v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) + out = v.draw_instance_predictions(outputs["instances"].to("cpu")) + + return out.get_image() + + + +title = "Detectron2-MaskRCNN X101" +description = "demo for Detectron2. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.\ +
Model: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" +article = "

Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation | Detectron model ZOO

" + +gr.Interface( + inference, + [gr.inputs.Image(type="pil", label="Input")], + gr.outputs.Image(type="numpy", label="Output"), + title=title, + description=description, + article=article, + examples=[ + ["demo/sample_imgs/000000224200.jpg"], + ["demo/sample_imgs/000000344909.jpg"] + ]).launch() + diff --git a/configs/Base-RCNN-C4.yaml b/configs/Base-RCNN-C4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad --- /dev/null +++ b/configs/Base-RCNN-C4.yaml @@ -0,0 +1,18 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RPN: + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "Res5ROIHeads" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RCNN-DilatedC5.yaml b/configs/Base-RCNN-DilatedC5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27 --- /dev/null +++ b/configs/Base-RCNN-DilatedC5.yaml @@ -0,0 +1,31 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RESNETS: + OUT_FEATURES: ["res5"] + RES5_DILATION: 2 + RPN: + IN_FEATURES: ["res5"] + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["res5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RCNN-FPN-4gpu.yaml b/configs/Base-RCNN-FPN-4gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..628542c2c1d06783b53aa8f68720f58181fc7744 --- /dev/null +++ b/configs/Base-RCNN-FPN-4gpu.yaml @@ -0,0 +1,44 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + #TEST: ("coco_2017_val",) + #TEST: ("lvis_v0.5_val_cocofied",) + TEST: ("coco_2017_test-dev",) +SOLVER: + IMS_PER_BATCH: 16 #8 #16 + BASE_LR: 0.02 # 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45f3f0bb5761f5162b4d9c180f3222aeeb79b1b3 --- /dev/null +++ b/configs/Base-RCNN-FPN.yaml @@ -0,0 +1,42 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 #16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RetinaNet.yaml b/configs/Base-RetinaNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8 --- /dev/null +++ b/configs/Base-RetinaNet.yaml @@ -0,0 +1,25 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + BACKBONE: + NAME: "build_retinanet_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] + FPN: + IN_FEATURES: ["res3", "res4", "res5"] + RETINANET: + IOU_THRESHOLDS: [0.4, 0.5] + IOU_LABELS: [0, -1, 1] + SMOOTH_L1_LOSS_BETA: 0.0 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..773ac10e87c626760d00d831bf664ce9ff073c49 --- /dev/null +++ b/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,17 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + LOAD_PROPOSALS: True + RESNETS: + DEPTH: 50 + PROPOSAL_GENERATOR: + NAME: "PrecomputedProposals" +DATASETS: + TRAIN: ("coco_2017_train",) + PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) + TEST: ("coco_2017_val",) + PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) +DATALOADER: + # proposals are part of the dataset_dicts, and take a lot of RAM + NUM_WORKERS: 2 diff --git a/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db142cd671c1841b4f64cf130bee7f7954ecdd28 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bceb6b343618d8cd9a6c414ff9eb86ab31cc230a --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57a098f53ee8c54ecfa354cc96efefd890dc1b72 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f96130105c3ba6ab393e0932870903875f5cb732 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc51bce390a85ee3529ffdcebde05748e1646be0 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fe96f57febdac5790ea4cec168fa4b97ac4807a --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3262019a1211b910d3b371569199ed1afaacf6a4 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41395182bf5c9dd8ab1241c4414068817298d554 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c9b5ab77157baa581d90d9847c045c19ed6ffa3 --- /dev/null +++ b/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: False + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4abb1b9a547957aa6afc0b29129e00f89cf98d59 --- /dev/null +++ b/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RetinaNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/retinanet_R_50_FPN_1x.py b/configs/COCO-Detection/retinanet_R_50_FPN_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..db86b18a9ee03789f5bc0066d470609d3515d524 --- /dev/null +++ b/configs/COCO-Detection/retinanet_R_50_FPN_1x.py @@ -0,0 +1,9 @@ +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco import dataloader +from ..common.models.retinanet import model +from ..common.train import train + +dataloader.train.mapper.use_instance_mask = False +model.backbone.bottom_up.freeze_at = 2 +optimizer.lr = 0.01 diff --git a/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a24ce3a9a108a8792e18c8aabfb7b712f0d3725 --- /dev/null +++ b/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml @@ -0,0 +1,5 @@ +_BASE_: "../Base-RetinaNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b5412d4a7aef1d6c3f7c1e34f94007de639b833 --- /dev/null +++ b/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RetinaNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/configs/COCO-Detection/rpn_R_50_C4_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e04821156b0376ba5215d5ce5b7010a36b43e6a1 --- /dev/null +++ b/configs/COCO-Detection/rpn_R_50_C4_1x.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + META_ARCHITECTURE: "ProposalNetwork" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + RPN: + PRE_NMS_TOPK_TEST: 12000 + POST_NMS_TOPK_TEST: 2000 diff --git a/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a --- /dev/null +++ b/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "ProposalNetwork" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + RPN: + POST_NMS_TOPK_TEST: 2000 diff --git a/configs/COCO-InstanceSegmentation/.mask_rcnn_R_50_FPN_1x_4gpu.yaml.swp b/configs/COCO-InstanceSegmentation/.mask_rcnn_R_50_FPN_1x_4gpu.yaml.swp new file mode 100644 index 0000000000000000000000000000000000000000..d3cd42dd54ff5bad2b489ac7aa2e5d54aa651484 Binary files /dev/null and b/configs/COCO-InstanceSegmentation/.mask_rcnn_R_50_FPN_1x_4gpu.yaml.swp differ diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a94cc45a0f2aaa8c92e14871c553b736545e327 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67b70cf4be8c19f5dc735b6f55a8690698f34b69 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1935a302d2d0fa7f69553b3fd50b5a7082c6c0d1 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner.yaml new file mode 100644 index 0000000000000000000000000000000000000000..315b95933a4449f1bee6790d31f45bdd180717de --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_a3ec72.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_deform.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_deform.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e85038edcce2cfd676e518e01274d7670cfa57a --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_deform.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_a3ec72.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_lvis.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_lvis.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5197d67b143be9a4a1273a9a8983cd32d80e6a7 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_lvis.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_a3ec72.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +DATASETS: + TEST: ("lvis_v0.5_val_cocofied",) + diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5bf932d9e216c1a866c8ed5d1d571242c97326 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py @@ -0,0 +1,7 @@ +from ..common.train import train +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco import dataloader +from ..common.models.mask_rcnn_c4 import model + +model.backbone.freeze_at = 2 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38ed867d897dfec839cbcf11a2e2dc8abb92f07c --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b13eefab2a049c48d94d5051c82ceb6dbde40579 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d401016358f967f6619d88b1c9bd5673a1cdeba8 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..6f216a61c5181fe9aa1c5d1008b51cbc6fb86285 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py @@ -0,0 +1,7 @@ +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco import dataloader +from ..common.models.mask_rcnn_fpn import model +from ..common.train import train + +model.backbone.bottom_up.freeze_at = 2 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d50fb866ca7811a87b42555c7213f88e00bf6df1 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb896c8fa5971cea94099fbfffc9140418603af8 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu_transfiner.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6eb97408fee1a5aec65d0985a6eac2598aa9b113 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu_transfiner.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_a54504.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bec680ee17a474fefe527b7b79d26266e75c09f0 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + RPN: + BBOX_REG_LOSS_TYPE: "giou" + BBOX_REG_LOSS_WEIGHT: 2.0 + ROI_BOX_HEAD: + BBOX_REG_LOSS_TYPE: "giou" + BBOX_REG_LOSS_WEIGHT: 10.0 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be7d06b8e0f032ee7fcaabd7c122158518489fd2 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f962edd32eaa6e8ea38d70599e036f8a415fbe6c --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_f10217.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_deform.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_deform.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4ef02309eade988e17afcdd2fe4c4c8f96a23e7 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_deform.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_f10217.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_lvis.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_lvis.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae4ae125513269daf90df615fa5b0324c3cc61b --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_lvis.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "./init_weights/model_final_f10217.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +DATASETS: + TEST: ("lvis_v0.5_val_cocofied",) + diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d14c63f74383bfc308750f51d51344398b02a239 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: True + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x_transfiner.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6fc19397ba59079cd2add4751f96a202b8b1e37 --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x_transfiner.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + MASK_ON: True + WEIGHTS: "./init_weights/model_final_x101.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..d7bbdd7d00505f1e51154379c99ab621cb648a6d --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py @@ -0,0 +1,34 @@ +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco import dataloader +from ..common.models.mask_rcnn_fpn import model +from ..common.train import train + +from detectron2.config import LazyCall as L +from detectron2.modeling.backbone import RegNet +from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock + + +# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source: +# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa +model.backbone.bottom_up = L(RegNet)( + stem_class=SimpleStem, + stem_width=32, + block_class=ResBottleneckBlock, + depth=23, + w_a=38.65, + w_0=96, + w_m=2.43, + group_width=40, + freeze_at=2, + norm="FrozenBN", + out_features=["s1", "s2", "s3", "s4"], +) +model.pixel_std = [57.375, 57.120, 58.395] + +optimizer.weight_decay = 5e-5 +train.init_checkpoint = ( + "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth" +) +# RegNets benefit from enabling cudnn benchmark mode +train.cudnn_benchmark = True diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..72c6b7a5c8939970bd0e1e4a3c1155695943b19a --- /dev/null +++ b/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py @@ -0,0 +1,35 @@ +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco import dataloader +from ..common.models.mask_rcnn_fpn import model +from ..common.train import train + +from detectron2.config import LazyCall as L +from detectron2.modeling.backbone import RegNet +from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock + + +# Replace default ResNet with RegNetY-4GF from the DDS paper. Config source: +# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa +model.backbone.bottom_up = L(RegNet)( + stem_class=SimpleStem, + stem_width=32, + block_class=ResBottleneckBlock, + depth=22, + w_a=31.41, + w_0=96, + w_m=2.24, + group_width=64, + se_ratio=0.25, + freeze_at=2, + norm="FrozenBN", + out_features=["s1", "s2", "s3", "s4"], +) +model.pixel_std = [57.375, 57.120, 58.395] + +optimizer.weight_decay = 5e-5 +train.init_checkpoint = ( + "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth" +) +# RegNets benefit from enabling cudnn benchmark mode +train.cudnn_benchmark = True diff --git a/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e03944a42d2e497da5ceca17c8fda797dac3f82 --- /dev/null +++ b/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml @@ -0,0 +1,15 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + KEYPOINT_ON: True + ROI_HEADS: + NUM_CLASSES: 1 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss + RPN: + # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. + # 1000 proposals per-image is found to hurt box AP. + # Therefore we increase it to 1500 per-image. + POST_NMS_TOPK_TRAIN: 1500 +DATASETS: + TRAIN: ("keypoints_coco_2017_train",) + TEST: ("keypoints_coco_2017_val",) diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9309535c57a1aa7d23297aac80a9bd78a6c79fcc --- /dev/null +++ b/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..b74e8ac52d121cfa76ea4f5ec6562552c072ff22 --- /dev/null +++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py @@ -0,0 +1,7 @@ +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco_keypoint import dataloader +from ..common.models.keypoint_rcnn_fpn import model +from ..common.train import train + +model.backbone.bottom_up.freeze_at = 2 diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335 --- /dev/null +++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,5 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a07f243f650a497b9372501e3face75194cf0941 --- /dev/null +++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867 --- /dev/null +++ b/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f00d54b760c2b9271c75643e0a1ab1ffc0d9543a --- /dev/null +++ b/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + MASK_ON: True + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) +DATALOADER: + FILTER_EMPTY_ANNOTATIONS: False diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e01f6fb31e9b00b1857b7de3b5074184d1f4a21 --- /dev/null +++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..d7378e093e12ff854d0c46f1eda9177190d31813 --- /dev/null +++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py @@ -0,0 +1,7 @@ +from ..common.optim import SGD as optimizer +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.data.coco_panoptic_separated import dataloader +from ..common.models.panoptic_fpn import model +from ..common.train import train + +model.backbone.bottom_up.freeze_at = 2 diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6afa2c1cc92495309ed1553a17359fe5d7d6566e --- /dev/null +++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml @@ -0,0 +1,5 @@ +_BASE_: "Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b956b3f673e78649184fe2c50e2700b3f1f14794 --- /dev/null +++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e --- /dev/null +++ b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + # For better, more stable performance initialize from COCO + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" + MASK_ON: True + ROI_HEADS: + NUM_CLASSES: 8 +# This is similar to the setting used in Mask R-CNN paper, Appendix A +# But there are some differences, e.g., we did not initialize the output +# layer using the corresponding classes from COCO +INPUT: + MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 1024 + MAX_SIZE_TRAIN: 2048 + MAX_SIZE_TEST: 2048 +DATASETS: + TRAIN: ("cityscapes_fine_instance_seg_train",) + TEST: ("cityscapes_fine_instance_seg_val",) +SOLVER: + BASE_LR: 0.01 + STEPS: (18000,) + MAX_ITER: 24000 + IMS_PER_BATCH: 8 +TEST: + EVAL_PERIOD: 8000 diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b4f2e6545e6920f8d3a84f1c517d79679a848c0 --- /dev/null +++ b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + # For better, more stable performance initialize from COCO + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" + MASK_ON: True + ROI_HEADS: + NUM_CLASSES: 8 +# This is similar to the setting used in Mask R-CNN paper, Appendix A +# But there are some differences, e.g., we did not initialize the output +# layer using the corresponding classes from COCO +INPUT: + MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 1024 + MAX_SIZE_TRAIN: 2048 + MAX_SIZE_TEST: 2048 +DATASETS: + TRAIN: ("cityscapes_fine_instance_seg_train",) + TEST: ("cityscapes_fine_instance_seg_val",) +SOLVER: + BASE_LR: 0.005 + STEPS: (36000,) + MAX_ITER: 48000 + IMS_PER_BATCH: 4 +TEST: + EVAL_PERIOD: 48000 diff --git a/configs/Detectron1-Comparisons/README.md b/configs/Detectron1-Comparisons/README.md new file mode 100644 index 0000000000000000000000000000000000000000..924fd00af642ddf1a4ff4c4f5947f676134eb7de --- /dev/null +++ b/configs/Detectron1-Comparisons/README.md @@ -0,0 +1,84 @@ + +Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron. + +The differences in implementation details are shared in +[Compatibility with Other Libraries](../../docs/notes/compatibility.md). + +The differences in model zoo's experimental settings include: +* Use scale augmentation during training. This improves AP with lower training cost. +* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may + affect other AP. +* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP. +* Use `ROIAlignV2`. This does not significantly affect AP. + +In this directory, we provide a few configs that __do not__ have the above changes. +They mimic Detectron's behavior as close as possible, +and provide a fair comparison of accuracy and speed against Detectron. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
kp.
AP
model iddownload
Faster R-CNN1x0.2190.0383.136.9137781054model | metrics
Keypoint R-CNN1x0.3130.0715.053.164.2137781195model | metrics
Mask R-CNN1x0.2730.0433.437.834.9137781281model | metrics
+ +## Comparisons: + +* Faster R-CNN: Detectron's AP is 36.7, similar to ours. +* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's + [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be + compensated back by some parameter tuning. +* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation. + See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details. + +For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html). diff --git a/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa --- /dev/null +++ b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml @@ -0,0 +1,17 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + RPN: + SMOOTH_L1_BETA: 0.1111 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808 --- /dev/null +++ b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + RPN: + SMOOTH_L1_BETA: 0.1111 + # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 + # 1000 proposals per-image is found to hurt box AP. + # Therefore we increase it to 1500 per-image. + POST_NMS_TOPK_TRAIN: 1500 +DATASETS: + TRAIN: ("keypoints_coco_2017_train",) + TEST: ("keypoints_coco_2017_val",) diff --git a/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6 --- /dev/null +++ b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + RPN: + SMOOTH_L1_BETA: 0.1111 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + ROI_MASK_HEAD: + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de110d26e773c35504a96d75724545777d2332ee --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "./model_final_824ab5.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 150 #300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c474187bdf2db5c9662c8b7083ba481ded378fbd --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 150 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,23 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157 --- /dev/null +++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d --- /dev/null +++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898 --- /dev/null +++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,26 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8 --- /dev/null +++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + RPN: + POST_NMS_TOPK_TRAIN: 2000 diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447 --- /dev/null +++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,15 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25 --- /dev/null +++ b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml @@ -0,0 +1,36 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: True + WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 152 + DEFORM_ON_PER_STAGE: [False, True, True, True] + ROI_HEADS: + NAME: "CascadeROIHeads" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "GN" + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + NUM_CONV: 8 + NORM: "GN" + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + IMS_PER_BATCH: 128 + STEPS: (35000, 45000) + MAX_ITER: 50000 + BASE_LR: 0.16 +INPUT: + MIN_SIZE_TRAIN: (640, 864) + MIN_SIZE_TRAIN_SAMPLING: "range" + MAX_SIZE_TRAIN: 1440 + CROP: + ENABLED: True +TEST: + EVAL_PERIOD: 2500 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + CLS_AGNOSTIC_MASK: True diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..699bea11dfa413c0718681752963cd97ab29b52c --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (420000, 500000) # (210000, 250000) + MAX_ITER: 540000 # 270000 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml @@ -0,0 +1,21 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" + MASK_ON: True + RESNETS: + DEPTH: 50 + NORM: "GN" + STRIDE_IN_1X1: False + FPN: + NORM: "GN" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "GN" + ROI_MASK_HEAD: + NORM: "GN" +SOLVER: + # 3x schedule + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml @@ -0,0 +1,24 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + NORM: "SyncBN" + STRIDE_IN_1X1: True + FPN: + NORM: "SyncBN" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "SyncBN" + ROI_MASK_HEAD: + NORM: "SyncBN" +SOLVER: + # 3x schedule + STEPS: (210000, 250000) + MAX_ITER: 270000 +TEST: + PRECISE_BN: + ENABLED: True diff --git a/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..0f2464be744c083985898a25f9e71d00104f689d --- /dev/null +++ b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py @@ -0,0 +1,151 @@ +# An example config to train a mmdetection model using detectron2. + +from ..common.data.coco import dataloader +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.optim import SGD as optimizer +from ..common.train import train + +from detectron2.modeling.mmdet_wrapper import MMDetDetector +from detectron2.config import LazyCall as L + +model = L(MMDetDetector)( + detector=dict( + type="MaskRCNN", + pretrained="torchvision://resnet50", + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + ), + neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), + rpn_head=dict( + type="RPNHead", + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + ), + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[1.0, 1.0, 1.0, 1.0], + ), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + roi_head=dict( + type="StandardRoIHead", + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2], + ), + reg_class_agnostic=False, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="FCNMaskHead", + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + ), + allowed_border=-1, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + score_thr=0.05, + nms=dict(type="nms", iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5, + ), + ), + ), + pixel_mean=[123.675, 116.280, 103.530], + pixel_std=[58.395, 57.120, 57.375], +) + +dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model +train.init_checkpoint = None # pretrained model is loaded inside backbone diff --git a/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b --- /dev/null +++ b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml @@ -0,0 +1,26 @@ +# A large PanopticFPN for demo purposes. +# Use GN on backbone to support semantic seg. +# Use Cascade + Deform Conv to improve localization. +_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" + RESNETS: + DEPTH: 101 + NORM: "GN" + DEFORM_ON_PER_STAGE: [False, True, True, True] + STRIDE_IN_1X1: False + FPN: + NORM: "GN" + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + NORM: "GN" + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + STEPS: (105000, 125000) + MAX_ITER: 135000 + IMS_PER_BATCH: 32 + BASE_LR: 0.04 diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5 --- /dev/null +++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml @@ -0,0 +1,13 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" +MODEL: + # Train from random initialization. + WEIGHTS: "" + # It makes sense to divide by STD when training from scratch + # But it seems to make no difference on the results and C2's models didn't do this. + # So we keep things consistent with C2. + # PIXEL_STD: [57.375, 57.12, 58.395] + MASK_ON: True + BACKBONE: + FREEZE_AT: 0 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176 --- /dev/null +++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml @@ -0,0 +1,19 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" +MODEL: + PIXEL_STD: [57.375, 57.12, 58.395] + WEIGHTS: "" + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False + BACKBONE: + FREEZE_AT: 0 +SOLVER: + # 9x schedule + IMS_PER_BATCH: 64 # 4x the standard + STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k + MAX_ITER: 202500 # 90k * 9 / 4 + BASE_LR: 0.08 +TEST: + EVAL_PERIOD: 2500 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a --- /dev/null +++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml @@ -0,0 +1,19 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" +MODEL: + PIXEL_STD: [57.375, 57.12, 58.395] + WEIGHTS: "" + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False + BACKBONE: + FREEZE_AT: 0 +SOLVER: + # 9x schedule + IMS_PER_BATCH: 64 # 4x the standard + STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k + MAX_ITER: 202500 # 90k * 9 / 4 + BASE_LR: 0.08 +TEST: + EVAL_PERIOD: 2500 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/configs/Misc/semantic_R_50_FPN_1x.yaml b/configs/Misc/semantic_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5 --- /dev/null +++ b/configs/Misc/semantic_R_50_FPN_1x.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_stuffonly",) + TEST: ("coco_2017_val_panoptic_stuffonly",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/configs/Misc/torchvision_imagenet_R_50.py b/configs/Misc/torchvision_imagenet_R_50.py new file mode 100644 index 0000000000000000000000000000000000000000..0d75305bcf7445b98db84b3d489a1505d2fce5af --- /dev/null +++ b/configs/Misc/torchvision_imagenet_R_50.py @@ -0,0 +1,150 @@ +""" +An example config file to train a ImageNet classifier with detectron2. +Model and dataloader both come from torchvision. +This shows how to use detectron2 as a general engine for any new models and tasks. + +To run, use the following command: + +python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \ + --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/ + +""" + + +import torch +from torch import nn +from torch.nn import functional as F +from omegaconf import OmegaConf +import torchvision +from torchvision.transforms import transforms as T +from torchvision.models.resnet import ResNet, Bottleneck +from fvcore.common.param_scheduler import MultiStepParamScheduler + +from detectron2.solver import WarmupParamScheduler +from detectron2.solver.build import get_default_optimizer_params +from detectron2.config import LazyCall as L +from detectron2.model_zoo import get_config +from detectron2.data.samplers import TrainingSampler, InferenceSampler +from detectron2.evaluation import DatasetEvaluator +from detectron2.utils import comm + + +""" +Note: Here we put reusable code (models, evaluation, data) together with configs just as a +proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2. +Writing code in configs offers extreme flexibility but is often not a good engineering practice. +In practice, you might want to put code in your project and import them instead. +""" + + +def build_data_loader(dataset, batch_size, num_workers, training=True): + return torch.utils.data.DataLoader( + dataset, + sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)), + batch_size=batch_size, + num_workers=num_workers, + pin_memory=True, + ) + + +class ClassificationNet(nn.Module): + def __init__(self, model: nn.Module): + super().__init__() + self.model = model + + @property + def device(self): + return list(self.model.parameters())[0].device + + def forward(self, inputs): + image, label = inputs + pred = self.model(image.to(self.device)) + if self.training: + label = label.to(self.device) + return F.cross_entropy(pred, label) + else: + return pred + + +class ClassificationAcc(DatasetEvaluator): + def reset(self): + self.corr = self.total = 0 + + def process(self, inputs, outputs): + image, label = inputs + self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item() + self.total += len(label) + + def evaluate(self): + all_corr_total = comm.all_gather([self.corr, self.total]) + corr = sum(x[0] for x in all_corr_total) + total = sum(x[1] for x in all_corr_total) + return {"accuracy": corr / total} + + +# --- End of code that could be in a project and be imported + + +dataloader = OmegaConf.create() +dataloader.train = L(build_data_loader)( + dataset=L(torchvision.datasets.ImageNet)( + root="/path/to/imagenet", + split="train", + transform=L(T.Compose)( + transforms=[ + L(T.RandomResizedCrop)(size=224), + L(T.RandomHorizontalFlip)(), + T.ToTensor(), + L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ] + ), + ), + batch_size=256 // 8, + num_workers=4, + training=True, +) + +dataloader.test = L(build_data_loader)( + dataset=L(torchvision.datasets.ImageNet)( + root="${...train.dataset.root}", + split="val", + transform=L(T.Compose)( + transforms=[ + L(T.Resize)(size=256), + L(T.CenterCrop)(size=224), + T.ToTensor(), + L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ] + ), + ), + batch_size=256 // 8, + num_workers=4, + training=False, +) + +dataloader.evaluator = L(ClassificationAcc)() + +model = L(ClassificationNet)( + model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True) +) + + +optimizer = L(torch.optim.SGD)( + params=L(get_default_optimizer_params)(), + lr=0.1, + momentum=0.9, + weight_decay=1e-4, +) + +lr_multiplier = L(WarmupParamScheduler)( + scheduler=L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100] + ), + warmup_length=1 / 100, + warmup_factor=0.1, +) + + +train = get_config("common/train.py").train +train.init_checkpoint = None +train.max_iter = 100 * 1281167 // 256 diff --git a/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea2a6baaebd1a186db18f2904430ffb25901898e --- /dev/null +++ b/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 20 +INPUT: + MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 800 +DATASETS: + TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') + TEST: ('voc_2007_test',) +SOLVER: + STEPS: (12000, 16000) + MAX_ITER: 18000 # 17.4 epochs + WARMUP_ITERS: 100 diff --git a/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e554cab18a358a27b630c1ab0c2359666b0e1514 --- /dev/null +++ b/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 20 +INPUT: + MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 800 +DATASETS: + TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') + TEST: ('voc_2007_test',) +SOLVER: + STEPS: (12000, 16000) + MAX_ITER: 18000 # 17.4 epochs + WARMUP_ITERS: 100 diff --git a/configs/common/README.md b/configs/common/README.md new file mode 100644 index 0000000000000000000000000000000000000000..912cc29927542bfe4258d3208cf52d73cb0ea477 --- /dev/null +++ b/configs/common/README.md @@ -0,0 +1,6 @@ +This directory provides definitions for a few common models, dataloaders, scheduler, +and optimizers that are often used in training. +The definition of these objects are provided in the form of lazy instantiation: +their arguments can be edited by users before constructing the objects. + +They can be imported, or loaded by `model_zoo.get_config` API in users' own configs. diff --git a/configs/common/coco_schedule.py b/configs/common/coco_schedule.py new file mode 100644 index 0000000000000000000000000000000000000000..355e66a1d213cb599a7ffe55089d854089c8ead2 --- /dev/null +++ b/configs/common/coco_schedule.py @@ -0,0 +1,47 @@ +from fvcore.common.param_scheduler import MultiStepParamScheduler + +from detectron2.config import LazyCall as L +from detectron2.solver import WarmupParamScheduler + + +def default_X_scheduler(num_X): + """ + Returns the config for a default multi-step LR scheduler such as "1x", "3x", + commonly referred to in papers, where every 1x has the total length of 1440k + training images (~12 COCO epochs). LR is decayed twice at the end of training + following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4. + + Args: + num_X: a positive real number + + Returns: + DictConfig: configs that define the multiplier for LR during training + """ + # total number of iterations assuming 16 batch size, using 1440000/16=90000 + total_steps_16bs = num_X * 90000 + + if num_X <= 2: + scheduler = L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01], + # note that scheduler is scale-invariant. This is equivalent to + # milestones=[6, 8, 9] + milestones=[60000, 80000, 90000], + ) + else: + scheduler = L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01], + milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs], + ) + return L(WarmupParamScheduler)( + scheduler=scheduler, + warmup_length=1000 / total_steps_16bs, + warmup_method="linear", + warmup_factor=0.001, + ) + + +lr_multiplier_1x = default_X_scheduler(1) +lr_multiplier_2x = default_X_scheduler(2) +lr_multiplier_3x = default_X_scheduler(3) +lr_multiplier_6x = default_X_scheduler(6) +lr_multiplier_9x = default_X_scheduler(9) diff --git a/configs/common/data/coco.py b/configs/common/data/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..703c4385c7ddc7eb0759c98d102ab2384d6a9e3e --- /dev/null +++ b/configs/common/data/coco.py @@ -0,0 +1,48 @@ +from omegaconf import OmegaConf + +import detectron2.data.transforms as T +from detectron2.config import LazyCall as L +from detectron2.data import ( + DatasetMapper, + build_detection_test_loader, + build_detection_train_loader, + get_detection_dataset_dicts, +) +from detectron2.evaluation import COCOEvaluator + +dataloader = OmegaConf.create() + +dataloader.train = L(build_detection_train_loader)( + dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), + mapper=L(DatasetMapper)( + is_train=True, + augmentations=[ + L(T.ResizeShortestEdge)( + short_edge_length=(640, 672, 704, 736, 768, 800), + sample_style="choice", + max_size=1333, + ), + L(T.RandomFlip)(horizontal=True), + ], + image_format="BGR", + use_instance_mask=True, + ), + total_batch_size=16, + num_workers=4, +) + +dataloader.test = L(build_detection_test_loader)( + dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), + mapper=L(DatasetMapper)( + is_train=False, + augmentations=[ + L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), + ], + image_format="${...train.mapper.image_format}", + ), + num_workers=4, +) + +dataloader.evaluator = L(COCOEvaluator)( + dataset_name="${..test.dataset.names}", +) diff --git a/configs/common/data/coco_keypoint.py b/configs/common/data/coco_keypoint.py new file mode 100644 index 0000000000000000000000000000000000000000..b4ceb066faf696954244205dc75376b767071217 --- /dev/null +++ b/configs/common/data/coco_keypoint.py @@ -0,0 +1,13 @@ +from detectron2.data.detection_utils import create_keypoint_hflip_indices + +from .coco import dataloader + +dataloader.train.dataset.min_keypoints = 1 +dataloader.train.dataset.names = "keypoints_coco_2017_train" +dataloader.test.dataset.names = "keypoints_coco_2017_val" + +dataloader.train.mapper.update( + use_instance_mask=False, + use_keypoint=True, + keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names), +) diff --git a/configs/common/data/coco_panoptic_separated.py b/configs/common/data/coco_panoptic_separated.py new file mode 100644 index 0000000000000000000000000000000000000000..5ccbc77e64d1c92c99cbd7158d047bab54cb9f3d --- /dev/null +++ b/configs/common/data/coco_panoptic_separated.py @@ -0,0 +1,26 @@ +from detectron2.config import LazyCall as L +from detectron2.evaluation import ( + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + SemSegEvaluator, +) + +from .coco import dataloader + +dataloader.train.dataset.names = "coco_2017_train_panoptic_separated" +dataloader.train.dataset.filter_empty = False +dataloader.test.dataset.names = "coco_2017_val_panoptic_separated" + + +dataloader.evaluator = [ + L(COCOEvaluator)( + dataset_name="${...test.dataset.names}", + ), + L(SemSegEvaluator)( + dataset_name="${...test.dataset.names}", + ), + L(COCOPanopticEvaluator)( + dataset_name="${...test.dataset.names}", + ), +] diff --git a/configs/common/models/cascade_rcnn.py b/configs/common/models/cascade_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..c7372a801dc00d7fec4db8cda8c2612ce281d48a --- /dev/null +++ b/configs/common/models/cascade_rcnn.py @@ -0,0 +1,36 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads + +from .mask_rcnn_fpn import model + +# arguments that don't exist for Cascade R-CNN +[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] + +model.roi_heads.update( + _target_=CascadeROIHeads, + box_heads=[ + L(FastRCNNConvFCHead)( + input_shape=ShapeSpec(channels=256, height=7, width=7), + conv_dims=[], + fc_dims=[1024, 1024], + ) + for k in range(3) + ], + box_predictors=[ + L(FastRCNNOutputLayers)( + input_shape=ShapeSpec(channels=1024), + test_score_thresh=0.05, + box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), + cls_agnostic_bbox_reg=True, + num_classes="${...num_classes}", + ) + for (w1, w2) in [(10, 5), (20, 10), (30, 15)] + ], + proposal_matchers=[ + L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) + for th in [0.5, 0.6, 0.7] + ], +) diff --git a/configs/common/models/keypoint_rcnn_fpn.py b/configs/common/models/keypoint_rcnn_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..56b3994df249884d4816fc9a5c7f553a9ab6f400 --- /dev/null +++ b/configs/common/models/keypoint_rcnn_fpn.py @@ -0,0 +1,33 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead + +from .mask_rcnn_fpn import model + +[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]] + +model.roi_heads.update( + num_classes=1, + keypoint_in_features=["p2", "p3", "p4", "p5"], + keypoint_pooler=L(ROIPooler)( + output_size=14, + scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + keypoint_head=L(KRCNNConvDeconvUpsampleHead)( + input_shape=ShapeSpec(channels=256, width=14, height=14), + num_keypoints=17, + conv_dims=[512] * 8, + loss_normalizer="visible", + ), +) + +# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. +# 1000 proposals per-image is found to hurt box AP. +# Therefore we increase it to 1500 per-image. +model.proposal_generator.post_nms_topk = (1500, 1000) + +# Keypoint AP degrades (though box AP improves) when using plain L1 loss +model.roi_heads.box_predictor.smooth_l1_beta = 0.5 diff --git a/configs/common/models/mask_rcnn_c4.py b/configs/common/models/mask_rcnn_c4.py new file mode 100644 index 0000000000000000000000000000000000000000..a3dcf8be42a39c6e5f6e76e3ab23adeccb33085d --- /dev/null +++ b/configs/common/models/mask_rcnn_c4.py @@ -0,0 +1,88 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.meta_arch import GeneralizedRCNN +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator +from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.proposal_generator import RPN, StandardRPNHead +from detectron2.modeling.roi_heads import ( + FastRCNNOutputLayers, + MaskRCNNConvUpsampleHead, + Res5ROIHeads, +) + +model = L(GeneralizedRCNN)( + backbone=L(ResNet)( + stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), + stages=L(ResNet.make_default_stages)( + depth=50, + stride_in_1x1=True, + norm="FrozenBN", + ), + out_features=["res4"], + ), + proposal_generator=L(RPN)( + in_features=["res4"], + head=L(StandardRPNHead)(in_channels=1024, num_anchors=15), + anchor_generator=L(DefaultAnchorGenerator)( + sizes=[[32, 64, 128, 256, 512]], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[16], + offset=0.0, + ), + anchor_matcher=L(Matcher)( + thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True + ), + box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), + batch_size_per_image=256, + positive_fraction=0.5, + pre_nms_topk=(12000, 6000), + post_nms_topk=(2000, 1000), + nms_thresh=0.7, + ), + roi_heads=L(Res5ROIHeads)( + num_classes=80, + batch_size_per_image=512, + positive_fraction=0.25, + proposal_matcher=L(Matcher)( + thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False + ), + in_features=["res4"], + pooler=L(ROIPooler)( + output_size=14, + scales=(1.0 / 16,), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + res5=L(ResNet.make_stage)( + block_class=BottleneckBlock, + num_blocks=3, + stride_per_block=[2, 1, 1], + in_channels=1024, + bottleneck_channels=512, + out_channels=2048, + norm="FrozenBN", + stride_in_1x1=True, + ), + box_predictor=L(FastRCNNOutputLayers)( + input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1), + test_score_thresh=0.05, + box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), + num_classes="${..num_classes}", + ), + mask_head=L(MaskRCNNConvUpsampleHead)( + input_shape=L(ShapeSpec)( + channels="${...res5.out_channels}", + width="${...pooler.output_size}", + height="${...pooler.output_size}", + ), + num_classes="${..num_classes}", + conv_dims=[256], + ), + ), + pixel_mean=[103.530, 116.280, 123.675], + pixel_std=[1.0, 1.0, 1.0], + input_format="BGR", +) diff --git a/configs/common/models/mask_rcnn_fpn.py b/configs/common/models/mask_rcnn_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..3f87d8da83d93932ddd5e9dc5b38d42786c0cbb4 --- /dev/null +++ b/configs/common/models/mask_rcnn_fpn.py @@ -0,0 +1,93 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.meta_arch import GeneralizedRCNN +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator +from detectron2.modeling.backbone.fpn import LastLevelMaxPool +from detectron2.modeling.backbone import BasicStem, FPN, ResNet +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.proposal_generator import RPN, StandardRPNHead +from detectron2.modeling.roi_heads import ( + StandardROIHeads, + FastRCNNOutputLayers, + MaskRCNNConvUpsampleHead, + FastRCNNConvFCHead, +) + +model = L(GeneralizedRCNN)( + backbone=L(FPN)( + bottom_up=L(ResNet)( + stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), + stages=L(ResNet.make_default_stages)( + depth=50, + stride_in_1x1=True, + norm="FrozenBN", + ), + out_features=["res2", "res3", "res4", "res5"], + ), + in_features="${.bottom_up.out_features}", + out_channels=256, + top_block=L(LastLevelMaxPool)(), + ), + proposal_generator=L(RPN)( + in_features=["p2", "p3", "p4", "p5", "p6"], + head=L(StandardRPNHead)(in_channels=256, num_anchors=3), + anchor_generator=L(DefaultAnchorGenerator)( + sizes=[[32], [64], [128], [256], [512]], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + offset=0.0, + ), + anchor_matcher=L(Matcher)( + thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True + ), + box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), + batch_size_per_image=256, + positive_fraction=0.5, + pre_nms_topk=(2000, 1000), + post_nms_topk=(1000, 1000), + nms_thresh=0.7, + ), + roi_heads=L(StandardROIHeads)( + num_classes=80, + batch_size_per_image=512, + positive_fraction=0.25, + proposal_matcher=L(Matcher)( + thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False + ), + box_in_features=["p2", "p3", "p4", "p5"], + box_pooler=L(ROIPooler)( + output_size=7, + scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + box_head=L(FastRCNNConvFCHead)( + input_shape=ShapeSpec(channels=256, height=7, width=7), + conv_dims=[], + fc_dims=[1024, 1024], + ), + box_predictor=L(FastRCNNOutputLayers)( + input_shape=ShapeSpec(channels=1024), + test_score_thresh=0.05, + box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), + num_classes="${..num_classes}", + ), + mask_in_features=["p2", "p3", "p4", "p5"], + mask_pooler=L(ROIPooler)( + output_size=14, # ori is 14 + scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + mask_head=L(MaskRCNNConvUpsampleHead)( + input_shape=ShapeSpec(channels=256, width=14, height=14), + num_classes="${..num_classes}", + conv_dims=[256, 256, 256, 256, 256], + ), + ), + pixel_mean=[103.530, 116.280, 123.675], + pixel_std=[1.0, 1.0, 1.0], + input_format="BGR", +) diff --git a/configs/common/models/panoptic_fpn.py b/configs/common/models/panoptic_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..88f55d2ce9db62e61445d6a3700067d9d864ecae --- /dev/null +++ b/configs/common/models/panoptic_fpn.py @@ -0,0 +1,20 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling import PanopticFPN +from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead + +from .mask_rcnn_fpn import model + +model._target_ = PanopticFPN +model.sem_seg_head = L(SemSegFPNHead)( + input_shape={ + f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}") + for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32]) + }, + ignore_value=255, + num_classes=54, # COCO stuff + 1 + conv_dims=128, + common_stride=4, + loss_weight=0.5, + norm="GN", +) diff --git a/configs/common/models/retinanet.py b/configs/common/models/retinanet.py new file mode 100644 index 0000000000000000000000000000000000000000..01d168fe6f054b88933488bdc65516424ce917cd --- /dev/null +++ b/configs/common/models/retinanet.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.meta_arch import RetinaNet +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator +from detectron2.modeling.backbone.fpn import LastLevelP6P7 +from detectron2.modeling.backbone import BasicStem, FPN, ResNet +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.meta_arch.retinanet import RetinaNetHead + +model = L(RetinaNet)( + backbone=L(FPN)( + bottom_up=L(ResNet)( + stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), + stages=L(ResNet.make_default_stages)( + depth=50, + stride_in_1x1=True, + norm="FrozenBN", + ), + out_features=["res3", "res4", "res5"], + ), + in_features=["res3", "res4", "res5"], + out_channels=256, + top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"), + ), + head=L(RetinaNetHead)( + input_shape=[ShapeSpec(channels=256)], + num_classes="${..num_classes}", + conv_dims=[256, 256, 256, 256], + prior_prob=0.01, + num_anchors=9, + ), + anchor_generator=L(DefaultAnchorGenerator)( + sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128], + offset=0.0, + ), + box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), + anchor_matcher=L(Matcher)( + thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True + ), + num_classes=80, + head_in_features=["p3", "p4", "p5", "p6", "p7"], + focal_loss_alpha=0.25, + focal_loss_gamma=2.0, + pixel_mean=[103.530, 116.280, 123.675], + pixel_std=[1.0, 1.0, 1.0], + input_format="BGR", +) diff --git a/configs/common/optim.py b/configs/common/optim.py new file mode 100644 index 0000000000000000000000000000000000000000..d39d3aaa546c17e831d21d1758b69e8c1609415e --- /dev/null +++ b/configs/common/optim.py @@ -0,0 +1,15 @@ +import torch + +from detectron2.config import LazyCall as L +from detectron2.solver.build import get_default_optimizer_params + +SGD = L(torch.optim.SGD)( + params=L(get_default_optimizer_params)( + # params.model is meant to be set to the model object, before instantiating + # the optimizer. + weight_decay_norm=0.0 + ), + lr=0.02, + momentum=0.9, + weight_decay=1e-4, +) diff --git a/configs/common/train.py b/configs/common/train.py new file mode 100644 index 0000000000000000000000000000000000000000..7c63bdb073797e48e0b3640e668ecc1d5c137d59 --- /dev/null +++ b/configs/common/train.py @@ -0,0 +1,18 @@ +# Common training-related configs that are designed for "tools/lazyconfig_train_net.py" +# You can use your own instead, together with your own train_net.py +train = dict( + output_dir="./output", + init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl", + max_iter=90000, + amp=dict(enabled=False), # options for Automatic Mixed Precision + ddp=dict( # options for DistributedDataParallel + broadcast_buffers=False, + find_unused_parameters=False, + fp16_compression=False, + ), + checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer + eval_period=5000, + log_period=20, + device="cuda" + # ... +) diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..3740e9bb08c5f168a9ab3a6d94561678bad1775c --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py @@ -0,0 +1,9 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +model.backbone.bottom_up.stages.depth = 101 diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..18e5f0720c568db4ef0c97b59688b5e7866df606 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_101_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..63c54ee9a5ce2368494b775cc90fada1439feaa5 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_101_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..df7a2aedf480ed8dc4aa3645e37420e9b893fae4 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py @@ -0,0 +1,72 @@ +import detectron2.data.transforms as T +from detectron2.config.lazy import LazyCall as L +from detectron2.layers.batch_norm import NaiveSyncBatchNorm +from detectron2.solver import WarmupParamScheduler +from fvcore.common.param_scheduler import MultiStepParamScheduler + +from ..common.data.coco import dataloader +from ..common.models.mask_rcnn_fpn import model +from ..common.optim import SGD as optimizer +from ..common.train import train + +# train from scratch +train.init_checkpoint = "" +train.amp.enabled = True +train.ddp.fp16_compression = True +model.backbone.bottom_up.freeze_at = 0 + +# SyncBN +# fmt: off +model.backbone.bottom_up.stem.norm = \ + model.backbone.bottom_up.stages.norm = \ + model.backbone.norm = "SyncBN" + +# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by +# torch.nn.SyncBatchNorm. We can remove this after +# https://github.com/pytorch/pytorch/issues/36530 is fixed. +model.roi_heads.box_head.conv_norm = \ + model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c, + stats_mode="N") +# fmt: on + +# 2conv in RPN: +# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950 +model.proposal_generator.head.conv_dims = [-1, -1] + +# 4conv1fc box head +model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] +model.roi_heads.box_head.fc_dims = [1024] + +# resize_and_crop_image in: +# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950 +image_size = 1024 +dataloader.train.mapper.augmentations = [ + L(T.ResizeScale)( + min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size + ), + L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), + L(T.RandomFlip)(horizontal=True), +] + +# recompute boxes due to cropping +dataloader.train.mapper.recompute_boxes = True + +# larger batch-size. +dataloader.train.total_batch_size = 64 + +# Equivalent to 100 epochs. +# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep +train.max_iter = 184375 + +lr_multiplier = L(WarmupParamScheduler)( + scheduler=L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01], + milestones=[163889, 177546], + num_updates=train.max_iter, + ), + warmup_length=500 / train.max_iter, + warmup_factor=0.067, +) + +optimizer.lr = 0.1 +optimizer.weight_decay = 4e-5 diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..2a7c376da5f9269197c44079f3e0f3b09cdc63fa --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..97586b8f5330a9d995a0bffd1f5e7bd5b5656462 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..2ca1ede262cf5c37a3a54778458c74aff1479411 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter //= 2 # 100ep -> 50ep + +lr_multiplier.scheduler.milestones = [ + milestone // 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..249387fffeed7c02f592ecc84ee5a295533b1ed7 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py @@ -0,0 +1,29 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) +from detectron2.config import LazyCall as L +from detectron2.modeling.backbone import RegNet +from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock + +# Config source: +# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa +model.backbone.bottom_up = L(RegNet)( + stem_class=SimpleStem, + stem_width=32, + block_class=ResBottleneckBlock, + depth=23, + w_a=38.65, + w_0=96, + w_m=2.43, + group_width=40, + norm="SyncBN", + out_features=["s1", "s2", "s3", "s4"], +) +model.pixel_std = [57.375, 57.120, 58.395] + +# RegNets benefit from enabling cudnn benchmark mode +train.cudnn_benchmark = True diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..731320e74ebed4d8ceec58c07cb906542b8b021b --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..8f369a2afedb6c6e69fd52ff9a9a6b1cdf965937 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..da94e6f90d823f110e4a2373d7fd16b3d1ab5ac3 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py @@ -0,0 +1,30 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) +from detectron2.config import LazyCall as L +from detectron2.modeling.backbone import RegNet +from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock + +# Config source: +# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa +model.backbone.bottom_up = L(RegNet)( + stem_class=SimpleStem, + stem_width=32, + block_class=ResBottleneckBlock, + depth=22, + w_a=31.41, + w_0=96, + w_m=2.24, + group_width=64, + se_ratio=0.25, + norm="SyncBN", + out_features=["s1", "s2", "s3", "s4"], +) +model.pixel_std = [57.375, 57.120, 58.395] + +# RegNets benefit from enabling cudnn benchmark mode +train.cudnn_benchmark = True diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..b867cc865e5ac4d7b70221da141894efd7cbd75c --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py new file mode 100644 index 0000000000000000000000000000000000000000..7b86ea8c6c5c48f5d26c9e0df7cf96e745b17b34 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/quick_schedules/README.md b/configs/quick_schedules/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4e6c82ef3f75a73c7006f33d7c850a0d4781a58f --- /dev/null +++ b/configs/quick_schedules/README.md @@ -0,0 +1,8 @@ +These are quick configs for performance or accuracy regression tracking purposes. + +* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can + successfully finish. They are not expected to produce reasonable training results. +* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify + the results are as expected. +* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy + is within the normal range. diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e --- /dev/null +++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee --- /dev/null +++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896 --- /dev/null +++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c --- /dev/null +++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,15 @@ +_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) + TEST: ("coco_2017_val_100",) + PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457 --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" +DATASETS: + TEST: ("keypoints_coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141 --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,16 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + ROI_HEADS: + NUM_CLASSES: 1 +DATASETS: + TRAIN: ("keypoints_coco_2017_val_100",) + TEST: ("keypoints_coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml @@ -0,0 +1,30 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False + LOSS_WEIGHT: 4.0 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss + RPN: + SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss +DATASETS: + TRAIN: ("keypoints_coco_2017_val",) + TEST: ("keypoints_coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + WARMUP_FACTOR: 0.33333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0 --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,28 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss + RPN: + SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss +DATASETS: + TRAIN: ("keypoints_coco_2017_val",) + TEST: ("keypoints_coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + WARMUP_FACTOR: 0.33333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.001 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "value" + CLIP_VALUE: 1.0 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.001 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + IMS_PER_BATCH: 8 # base uses 16 + WARMUP_FACTOR: 0.33333 + WARMUP_ITERS: 100 + STEPS: (11000, 11600) + MAX_ITER: 12000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,10 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] + AUG: + ENABLED: True + MIN_SIZES: (700, 800) # to save some time diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml @@ -0,0 +1,6 @@ +_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" +MODEL: + ROI_BOX_HEAD: + TRAIN_ON_PRED_BOXES: True +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,21 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + WARMUP_FACTOR: 0.3333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42 --- /dev/null +++ b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" +DATASETS: + TEST: ("coco_2017_val_100_panoptic_separated",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] diff --git a/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed --- /dev/null +++ b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_val_100_panoptic_separated",) + TEST: ("coco_2017_val_100_panoptic_separated",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 1 diff --git a/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f --- /dev/null +++ b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_val_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) +SOLVER: + BASE_LR: 0.01 + WARMUP_FACTOR: 0.001 + WARMUP_ITERS: 500 + STEPS: (5500,) + MAX_ITER: 7000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]] diff --git a/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8 --- /dev/null +++ b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] diff --git a/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9 --- /dev/null +++ b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8 --- /dev/null +++ b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] diff --git a/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809 --- /dev/null +++ b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + STEPS: (30,) + MAX_ITER: 40 + BASE_LR: 0.005 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7 --- /dev/null +++ b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TEST: ("coco_2017_val_100_panoptic_stuffonly",) +TEST: + EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] diff --git a/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642 --- /dev/null +++ b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) + TEST: ("coco_2017_val_100_panoptic_stuffonly",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61 --- /dev/null +++ b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_val_panoptic_stuffonly",) + TEST: ("coco_2017_val_panoptic_stuffonly",) +SOLVER: + BASE_LR: 0.01 + WARMUP_FACTOR: 0.001 + WARMUP_ITERS: 300 + STEPS: (5500,) + MAX_ITER: 7000 +TEST: + EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f11ad3eb72953a7bc05d5e333fca4a62ab633b9c --- /dev/null +++ b/demo/README.md @@ -0,0 +1,5 @@ + +## Mask Transfiner Demo + +For visualization demo, please refer to our [visualization script](https://github.com/SysCV/transfiner#visualization). + diff --git a/demo/__pycache__/predictor.cpython-38.pyc b/demo/__pycache__/predictor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fed9aa41681040202f3708d1122e5240091b8a9 Binary files /dev/null and b/demo/__pycache__/predictor.cpython-38.pyc differ diff --git a/demo/demo.py b/demo/demo.py new file mode 100755 index 0000000000000000000000000000000000000000..a14dfb94c998bd3bfb650004a6fe1a23bf17eda3 --- /dev/null +++ b/demo/demo.py @@ -0,0 +1,190 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import argparse +import glob +import multiprocessing as mp +import numpy as np +import os +import tempfile +import time +import warnings +import cv2 +import tqdm + +from detectron2.config import get_cfg +from detectron2.data.detection_utils import read_image +from detectron2.utils.logger import setup_logger + +from predictor import VisualizationDemo + +# constants +WINDOW_NAME = "COCO detections" + + +def setup_cfg(args): + # load config from file and command-line arguments + cfg = get_cfg() + # To use demo for Panoptic-DeepLab, please uncomment the following two lines. + # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa + # add_panoptic_deeplab_config(cfg) + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + # Set score_threshold for builtin models + cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold + cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold + cfg.freeze() + return cfg + + +def get_parser(): + parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs") + parser.add_argument( + "--config-file", + default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml", + metavar="FILE", + help="path to config file", + ) + parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") + parser.add_argument("--video-input", help="Path to video file.") + parser.add_argument( + "--input", + nargs="+", + help="A list of space separated input images; " + "or a single glob pattern such as 'directory/*.jpg'", + ) + parser.add_argument( + "--output", + help="A file or directory to save output visualizations. " + "If not given, will show output in an OpenCV window.", + ) + + parser.add_argument( + "--confidence-threshold", + type=float, + default=0.5, + help="Minimum score for instance predictions to be shown", + ) + parser.add_argument( + "--opts", + help="Modify config options using the command-line 'KEY VALUE' pairs", + default=[], + nargs=argparse.REMAINDER, + ) + return parser + + +def test_opencv_video_format(codec, file_ext): + with tempfile.TemporaryDirectory(prefix="video_format_test") as dir: + filename = os.path.join(dir, "test_file" + file_ext) + writer = cv2.VideoWriter( + filename=filename, + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=float(30), + frameSize=(10, 10), + isColor=True, + ) + [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)] + writer.release() + if os.path.isfile(filename): + return True + return False + + +if __name__ == "__main__": + mp.set_start_method("spawn", force=True) + args = get_parser().parse_args() + setup_logger(name="fvcore") + logger = setup_logger() + logger.info("Arguments: " + str(args)) + + cfg = setup_cfg(args) + + demo = VisualizationDemo(cfg) + + if args.input: + if len(args.input) == 1: + args.input = glob.glob(os.path.expanduser(args.input[0])) + assert args.input, "The input path(s) was not found" + for path in tqdm.tqdm(args.input, disable=not args.output): + # use PIL, to be consistent with evaluation + img = read_image(path, format="BGR") + start_time = time.time() + predictions, visualized_output = demo.run_on_image(img) + logger.info( + "{}: {} in {:.2f}s".format( + path, + "detected {} instances".format(len(predictions["instances"])) + if "instances" in predictions + else "finished", + time.time() - start_time, + ) + ) + + if args.output: + if os.path.isdir(args.output): + assert os.path.isdir(args.output), args.output + out_filename = os.path.join(args.output, os.path.basename(path)) + else: + #assert len(args.input) == 1, "Please specify a directory with args.output" + os.makedirs(args.output) + out_filename = os.path.join(args.output, os.path.basename(path)) + #out_filename = args.output + visualized_output.save(out_filename) + else: + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) + if cv2.waitKey(0) == 27: + break # esc to quit + elif args.webcam: + assert args.input is None, "Cannot have both --input and --webcam!" + assert args.output is None, "output not yet supported with --webcam!" + cam = cv2.VideoCapture(0) + for vis in tqdm.tqdm(demo.run_on_video(cam)): + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, vis) + if cv2.waitKey(1) == 27: + break # esc to quit + cam.release() + cv2.destroyAllWindows() + elif args.video_input: + video = cv2.VideoCapture(args.video_input) + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + frames_per_second = video.get(cv2.CAP_PROP_FPS) + num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + basename = os.path.basename(args.video_input) + codec, file_ext = ( + ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4") + ) + if codec == ".mp4v": + warnings.warn("x264 codec not available, switching to mp4v") + if args.output: + if os.path.isdir(args.output): + output_fname = os.path.join(args.output, basename) + output_fname = os.path.splitext(output_fname)[0] + file_ext + else: + output_fname = args.output + assert not os.path.isfile(output_fname), output_fname + output_file = cv2.VideoWriter( + filename=output_fname, + # some installation of opencv may not support x264 (due to its license), + # you can try other format (e.g. MPEG) + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=float(frames_per_second), + frameSize=(width, height), + isColor=True, + ) + assert os.path.isfile(args.video_input) + for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): + if args.output: + output_file.write(vis_frame) + else: + cv2.namedWindow(basename, cv2.WINDOW_NORMAL) + cv2.imshow(basename, vis_frame) + if cv2.waitKey(1) == 27: + break # esc to quit + video.release() + if args.output: + output_file.release() + else: + cv2.destroyAllWindows() diff --git a/demo/predictor.py b/demo/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..7b7ebd3f846850172c1f560f8492d51e5667f76d --- /dev/null +++ b/demo/predictor.py @@ -0,0 +1,220 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import atexit +import bisect +import multiprocessing as mp +from collections import deque +import cv2 +import torch + +from detectron2.data import MetadataCatalog +from detectron2.engine.defaults import DefaultPredictor +from detectron2.utils.video_visualizer import VideoVisualizer +from detectron2.utils.visualizer import ColorMode, Visualizer + + +class VisualizationDemo(object): + def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): + """ + Args: + cfg (CfgNode): + instance_mode (ColorMode): + parallel (bool): whether to run the model in different processes from visualization. + Useful since the visualization logic can be slow. + """ + self.metadata = MetadataCatalog.get( + cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" + ) + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + if parallel: + num_gpu = torch.cuda.device_count() + self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) + else: + self.predictor = DefaultPredictor(cfg) + + def run_on_image(self, image): + """ + Args: + image (np.ndarray): an image of shape (H, W, C) (in BGR order). + This is the format used by OpenCV. + + Returns: + predictions (dict): the output of the model. + vis_output (VisImage): the visualized image output. + """ + vis_output = None + predictions = self.predictor(image) + # Convert image from OpenCV BGR format to Matplotlib RGB format. + image = image[:, :, ::-1] + visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_output = visualizer.draw_panoptic_seg_predictions( + panoptic_seg.to(self.cpu_device), segments_info + ) + else: + if "sem_seg" in predictions: + vis_output = visualizer.draw_sem_seg( + predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) + ) + if "instances" in predictions: + instances = predictions["instances"].to(self.cpu_device) + vis_output = visualizer.draw_instance_predictions(predictions=instances) + + return predictions, vis_output + + def _frame_from_video(self, video): + while video.isOpened(): + success, frame = video.read() + if success: + yield frame + else: + break + + def run_on_video(self, video): + """ + Visualizes predictions on frames of the input video. + + Args: + video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be + either a webcam or a video file. + + Yields: + ndarray: BGR visualizations of each video frame. + """ + video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) + + def process_predictions(frame, predictions): + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_frame = video_visualizer.draw_panoptic_seg_predictions( + frame, panoptic_seg.to(self.cpu_device), segments_info + ) + elif "instances" in predictions: + predictions = predictions["instances"].to(self.cpu_device) + vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) + elif "sem_seg" in predictions: + vis_frame = video_visualizer.draw_sem_seg( + frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) + ) + + # Converts Matplotlib RGB format to OpenCV BGR format + vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) + return vis_frame + + frame_gen = self._frame_from_video(video) + if self.parallel: + buffer_size = self.predictor.default_buffer_size + + frame_data = deque() + + for cnt, frame in enumerate(frame_gen): + frame_data.append(frame) + self.predictor.put(frame) + + if cnt >= buffer_size: + frame = frame_data.popleft() + predictions = self.predictor.get() + yield process_predictions(frame, predictions) + + while len(frame_data): + frame = frame_data.popleft() + predictions = self.predictor.get() + yield process_predictions(frame, predictions) + else: + for frame in frame_gen: + yield process_predictions(frame, self.predictor(frame)) + + +class AsyncPredictor: + """ + A predictor that runs the model asynchronously, possibly on >1 GPUs. + Because rendering the visualization takes considerably amount of time, + this helps improve throughput a little bit when rendering videos. + """ + + class _StopToken: + pass + + class _PredictWorker(mp.Process): + def __init__(self, cfg, task_queue, result_queue): + self.cfg = cfg + self.task_queue = task_queue + self.result_queue = result_queue + super().__init__() + + def run(self): + predictor = DefaultPredictor(self.cfg) + + while True: + task = self.task_queue.get() + if isinstance(task, AsyncPredictor._StopToken): + break + idx, data = task + result = predictor(data) + self.result_queue.put((idx, result)) + + def __init__(self, cfg, num_gpus: int = 1): + """ + Args: + cfg (CfgNode): + num_gpus (int): if 0, will run on CPU + """ + num_workers = max(num_gpus, 1) + self.task_queue = mp.Queue(maxsize=num_workers * 3) + self.result_queue = mp.Queue(maxsize=num_workers * 3) + self.procs = [] + for gpuid in range(max(num_gpus, 1)): + cfg = cfg.clone() + cfg.defrost() + cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" + self.procs.append( + AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) + ) + + self.put_idx = 0 + self.get_idx = 0 + self.result_rank = [] + self.result_data = [] + + for p in self.procs: + p.start() + atexit.register(self.shutdown) + + def put(self, image): + self.put_idx += 1 + self.task_queue.put((self.put_idx, image)) + + def get(self): + self.get_idx += 1 # the index needed for this request + if len(self.result_rank) and self.result_rank[0] == self.get_idx: + res = self.result_data[0] + del self.result_data[0], self.result_rank[0] + return res + + while True: + # make sure the results are returned in the correct order + idx, res = self.result_queue.get() + if idx == self.get_idx: + return res + insert = bisect.bisect(self.result_rank, idx) + self.result_rank.insert(insert, idx) + self.result_data.insert(insert, res) + + def __len__(self): + return self.put_idx - self.get_idx + + def __call__(self, image): + self.put(image) + return self.get() + + def shutdown(self): + for _ in self.procs: + self.task_queue.put(AsyncPredictor._StopToken()) + + @property + def default_buffer_size(self): + return len(self.procs) * 5 diff --git a/demo/sample_imgs/000000008844.jpg b/demo/sample_imgs/000000008844.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d117937ec29e62d694bd6d2dc70eb41d9a92326c Binary files /dev/null and b/demo/sample_imgs/000000008844.jpg differ diff --git a/demo/sample_imgs/000000018737.jpg b/demo/sample_imgs/000000018737.jpg new file mode 100644 index 0000000000000000000000000000000000000000..340c394ff1398a1496c81855ff1128bbf8071842 Binary files /dev/null and b/demo/sample_imgs/000000018737.jpg differ diff --git a/demo/sample_imgs/000000126137.jpg b/demo/sample_imgs/000000126137.jpg new file mode 100644 index 0000000000000000000000000000000000000000..83c736e918992a085819d8a70103159b80c90998 Binary files /dev/null and b/demo/sample_imgs/000000126137.jpg differ diff --git a/demo/sample_imgs/000000131444.jpg b/demo/sample_imgs/000000131444.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d4f63ec0b4ce746eb0eba168eceacb0032d1aac3 Binary files /dev/null and b/demo/sample_imgs/000000131444.jpg differ diff --git a/demo/sample_imgs/000000132408.jpg b/demo/sample_imgs/000000132408.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dac4b04c9fefe52341456fe400d56a3d6ccb367b Binary files /dev/null and b/demo/sample_imgs/000000132408.jpg differ diff --git a/demo/sample_imgs/000000157365.jpg b/demo/sample_imgs/000000157365.jpg new file mode 100644 index 0000000000000000000000000000000000000000..10e719bef58161855d280d7a0034491d12a382f0 Binary files /dev/null and b/demo/sample_imgs/000000157365.jpg differ diff --git a/demo/sample_imgs/000000176037.jpg b/demo/sample_imgs/000000176037.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0abf887fb73e89869f761c8046c9227bc5bb298a Binary files /dev/null and b/demo/sample_imgs/000000176037.jpg differ diff --git a/demo/sample_imgs/000000224200.jpg b/demo/sample_imgs/000000224200.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc5b3de83c8b3861ec92ecef6263249ad7b11473 Binary files /dev/null and b/demo/sample_imgs/000000224200.jpg differ diff --git a/demo/sample_imgs/000000244019.jpg b/demo/sample_imgs/000000244019.jpg new file mode 100644 index 0000000000000000000000000000000000000000..54927eb0e93cf3b5ce55a33aa64f5dd36ebd1008 Binary files /dev/null and b/demo/sample_imgs/000000244019.jpg differ diff --git a/demo/sample_imgs/000000252776.jpg b/demo/sample_imgs/000000252776.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8b9dd0ef433ad232164a0c92c3414f49dffc6fec Binary files /dev/null and b/demo/sample_imgs/000000252776.jpg differ diff --git a/demo/sample_imgs/000000286849.jpg b/demo/sample_imgs/000000286849.jpg new file mode 100644 index 0000000000000000000000000000000000000000..12d9e147d759e2aeeb4e3903bc129157f71ac642 Binary files /dev/null and b/demo/sample_imgs/000000286849.jpg differ diff --git a/demo/sample_imgs/000000292997.jpg b/demo/sample_imgs/000000292997.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4d56af9492d02539b68805cb80c075d6efad63e3 Binary files /dev/null and b/demo/sample_imgs/000000292997.jpg differ diff --git a/demo/sample_imgs/000000321214.jpg b/demo/sample_imgs/000000321214.jpg new file mode 100644 index 0000000000000000000000000000000000000000..427cdf048ac5bb950bdf808e791e6a52477169b4 Binary files /dev/null and b/demo/sample_imgs/000000321214.jpg differ diff --git a/demo/sample_imgs/000000344909.jpg b/demo/sample_imgs/000000344909.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6323a743693f7d87c620888e5587edbf545f0f76 Binary files /dev/null and b/demo/sample_imgs/000000344909.jpg differ diff --git a/demo/sample_imgs/000000360661.jpg b/demo/sample_imgs/000000360661.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c90c058740466131082aed6fee6964cda04a4711 Binary files /dev/null and b/demo/sample_imgs/000000360661.jpg differ diff --git a/demo/sample_imgs/000000396903.jpg b/demo/sample_imgs/000000396903.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f10456ff9d60df5821d6427e672f9ffe51480d9b Binary files /dev/null and b/demo/sample_imgs/000000396903.jpg differ diff --git a/demo/sample_imgs/000000404922.jpg b/demo/sample_imgs/000000404922.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6595f7b259bbfeb5de8d8aa172254db8a0e56645 Binary files /dev/null and b/demo/sample_imgs/000000404922.jpg differ diff --git a/demo/sample_imgs/000000442836.jpg b/demo/sample_imgs/000000442836.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3e24da5924c518e34bc7c56dd7dc1404d58463b3 Binary files /dev/null and b/demo/sample_imgs/000000442836.jpg differ diff --git a/demo/sample_imgs/000000464144.jpg b/demo/sample_imgs/000000464144.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b59f7e4b7fd684e7d2b47b3ac9036fb592a5457d Binary files /dev/null and b/demo/sample_imgs/000000464144.jpg differ diff --git a/demo/sample_imgs/000000482477.jpg b/demo/sample_imgs/000000482477.jpg new file mode 100644 index 0000000000000000000000000000000000000000..98c5277b190faa54f12e85df99768bef255abfff Binary files /dev/null and b/demo/sample_imgs/000000482477.jpg differ diff --git a/demo/sample_imgs/000000495054.jpg b/demo/sample_imgs/000000495054.jpg new file mode 100644 index 0000000000000000000000000000000000000000..53ae52e0be22fbb426eeec63d14e85c5b2b9fab2 Binary files /dev/null and b/demo/sample_imgs/000000495054.jpg differ diff --git a/demo/sample_imgs/000000558073.jpg b/demo/sample_imgs/000000558073.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6d0a14a1b5b8765c0df91fde476f6a1d488a6b05 Binary files /dev/null and b/demo/sample_imgs/000000558073.jpg differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f033f4c1ff0e2e7d2ae6a19a0bcb8de6cd658a03 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +pyyaml==5.1 +torch +torchvision +opencv-python==4.4.0.40 +scikit-image +kornia==0.5.11 + +