Vishakaraj commited on
Commit
c709b60
β€’
1 Parent(s): 812fc55

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. .gitignore +62 -0
  3. .gitmodules +3 -0
  4. LICENSE +21 -0
  5. README.md +124 -8
  6. app.py +109 -0
  7. configs/Base.yaml +77 -0
  8. configs/GRiT_B_DenseCap.yaml +20 -0
  9. configs/GRiT_B_DenseCap_ObjectDet.yaml +23 -0
  10. configs/GRiT_B_ObjectDet.yaml +20 -0
  11. configs/GRiT_H_ObjectDet.yaml +21 -0
  12. configs/GRiT_L_ObjectDet.yaml +20 -0
  13. datasets/DATASETS.md +62 -0
  14. demo.py +125 -0
  15. demo_images/000000353174.jpg +0 -0
  16. demo_images/000000438652.jpg +0 -0
  17. demo_images/000000453583.jpg +0 -0
  18. demo_images/000000466185.jpg +0 -0
  19. demo_images/000000497110.jpg +0 -0
  20. demo_images/000000497861.jpg +0 -0
  21. demo_images/000000546072.jpg +0 -0
  22. detectron2/.circleci/config.yml +270 -0
  23. detectron2/.circleci/import-tests.sh +16 -0
  24. detectron2/.clang-format +85 -0
  25. detectron2/.flake8 +15 -0
  26. detectron2/.gitignore +53 -0
  27. detectron2/GETTING_STARTED.md +79 -0
  28. detectron2/INSTALL.md +262 -0
  29. detectron2/LICENSE +202 -0
  30. detectron2/MODEL_ZOO.md +1052 -0
  31. detectron2/README.md +68 -0
  32. detectron2/build/lib.linux-x86_64-3.10/detectron2/_C.cpython-310-x86_64-linux-gnu.so +3 -0
  33. detectron2/build/lib.linux-x86_64-3.10/detectron2/__init__.py +10 -0
  34. detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/__init__.py +10 -0
  35. detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/c2_model_loading.py +407 -0
  36. detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/catalog.py +115 -0
  37. detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/detection_checkpoint.py +121 -0
  38. detectron2/build/lib.linux-x86_64-3.10/detectron2/config/__init__.py +24 -0
  39. detectron2/build/lib.linux-x86_64-3.10/detectron2/config/compat.py +229 -0
  40. detectron2/build/lib.linux-x86_64-3.10/detectron2/config/config.py +265 -0
  41. detectron2/build/lib.linux-x86_64-3.10/detectron2/config/defaults.py +646 -0
  42. detectron2/build/lib.linux-x86_64-3.10/detectron2/config/instantiate.py +83 -0
  43. detectron2/build/lib.linux-x86_64-3.10/detectron2/config/lazy.py +400 -0
  44. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/__init__.py +19 -0
  45. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/benchmark.py +225 -0
  46. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/build.py +556 -0
  47. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/catalog.py +236 -0
  48. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/common.py +244 -0
  49. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/dataset_mapper.py +191 -0
  50. detectron2/build/lib.linux-x86_64-3.10/detectron2/data/datasets/__init__.py +9 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ detectron2/build/lib.linux-x86_64-3.10/detectron2/_C.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
37
+ detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o filter=lfs diff=lfs merge=lfs -text
38
+ detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o filter=lfs diff=lfs merge=lfs -text
39
+ detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.o filter=lfs diff=lfs merge=lfs -text
40
+ detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o filter=lfs diff=lfs merge=lfs -text
41
+ detectron2/build/temp.linux-x86_64-3.10/home/vishak66/Bitbucket/Vishakaraj[[:space:]]Shanmugavel/grit-hugging-face/detectron2/detectron2/layers/csrc/vision.o filter=lfs diff=lfs merge=lfs -text
42
+ detectron2/detectron2/_C.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
43
+ detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
44
+ docs/chatgpt.png filter=lfs diff=lfs merge=lfs -text
45
+ docs/demo.png filter=lfs diff=lfs merge=lfs -text
46
+ docs/grit.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ third_party/detectron2
2
+ ./models
3
+ configs-experimental
4
+ experiments
5
+ # output dir
6
+ index.html
7
+ data/*
8
+ slurm/
9
+ slurm
10
+ slurm-output
11
+ slurm-output/
12
+ output
13
+ instant_test_output
14
+ inference_test_output
15
+ ./aux_data
16
+ train.sh
17
+ install.sh
18
+
19
+
20
+ *.diff
21
+
22
+ # compilation and distribution
23
+ __pycache__
24
+ _ext
25
+ *.pyc
26
+ *.pyd
27
+ *.so
28
+ *.dll
29
+ *.egg-info/
30
+ build/
31
+ dist/
32
+ wheels/
33
+
34
+ # pytorch/python/numpy formats
35
+ *.pth
36
+ *.pkl
37
+ *.ts
38
+ model_ts*.txt
39
+
40
+ # ipython/jupyter notebooks
41
+ *.ipynb
42
+ **/.ipynb_checkpoints/
43
+
44
+ # Editor temporaries
45
+ *.swn
46
+ *.swo
47
+ *.swp
48
+ *~
49
+
50
+ # editor settings
51
+ .idea
52
+ .vscode
53
+ _darcs
54
+
55
+ # project dirs
56
+ /detectron2/model_zoo/configs
57
+ /datasets/*
58
+ !/datasets/*.*
59
+ !/datasets/metadata
60
+ /projects/*/datasets
61
+ /models
62
+ /snippet
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "third_party/CenterNet2"]
2
+ path = third_party/CenterNet2
3
+ url = https://github.com/xingyizhou/CenterNet2.git
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Microsoft Corporation
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,128 @@
1
  ---
2
- title: Dense Captioning - GRiT
3
- emoji: πŸŒ–
4
- colorFrom: purple
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 3.44.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
+ title: Dense_Captioning_-_GRiT
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 3.42.0
6
  ---
7
+ # GRiT: A Generative Region-to-text Transformer for Object Understanding
8
+ GRiT is a general and open-set object understanding framework that localizes objects and
9
+ describes them with any style of free-form texts it was trained with, e.g., class names, descriptive sentences
10
+ (including object attributes, actions, counts and many more).
11
+
12
+ > [**GRiT: A Generative Region-to-text Transformer for Object Understanding**](https://arxiv.org/abs/2212.00280) \
13
+ > Jialian Wu, Jianfeng Wang, Zhengyuan Yang, Zhe Gan, Zicheng Liu, Junsong Yuan, Lijuan Wang \
14
+ > <sup>1</sup>State University of New York at Buffalo, <sup>2</sup>Microsoft \
15
+ > *arXiv technical report* ([PDF](https://arxiv.org/pdf/2212.00280.pdf))
16
+
17
+ <p align="center"> <img src='docs/grit.png' align="center" height="400px"> </p>
18
+
19
+ ## Installation
20
+
21
+ Please follow [Installation instructions](docs/INSTALL.md).
22
+
23
+ ## ChatGPT with GRiT
24
+ We give ChatGPT GRiT's dense captioning outputs (object location and description) to have it
25
+ describe the scene and even write poetry. ChatGPT can generate amazing scene descriptions given our dense
26
+ captioning outputs. An example is shown below: :star_struck::star_struck::star_struck:
27
+
28
+ <p align="center"> <img src='docs/chatgpt.png' align="center"> </p>
29
+
30
+
31
+ ## Object Understanding Demo - One Model Two tasks
32
+
33
+ [Download the GRiT model](https://datarelease.blob.core.windows.net/grit/models/grit_b_densecap_objectdet.pth) or use the following commend to download:
34
+ ~~~
35
+ mkdir models && cd models
36
+ wget https://datarelease.blob.core.windows.net/grit/models/grit_b_densecap_objectdet.pth && cd ..
37
+ ~~~
38
+ The downloaded GRiT model was jointly trained on dense captioning
39
+ task and object detection task. With the same trained model, it can
40
+ output both rich descriptive sentences and short class names by varying
41
+ the flag `--test-task`. Play it as follows! :star_struck:
42
+
43
+ ### *Output for Dense Captioning (rich descriptive sentences)*
44
+
45
+ ~~~
46
+ python demo.py --test-task DenseCap --config-file configs/GRiT_B_DenseCap_ObjectDet.yaml --input demo_images --output visualization --opts MODEL.WEIGHTS models/grit_b_densecap_objectdet.pth
47
+ ~~~
48
+
49
+ ### *Output for Object Detection (short class names)*
50
+
51
+ ~~~
52
+ python demo.py --test-task ObjectDet --config-file configs/GRiT_B_DenseCap_ObjectDet.yaml --input demo_images --output visualization --opts MODEL.WEIGHTS models/grit_b_densecap_objectdet.pth
53
+ ~~~
54
+ Output images will be saved under the `visualization` folder, which looks like:
55
+ <p align="center"> <img src='docs/demo.png' align="center"> </p>
56
+
57
+ You can also try the Colab demo provided by the [TWC team](https://github.com/taskswithcode): [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/taskswithcode/GriT/blob/master/TWCGRiT.ipynb)
58
+
59
+
60
+ ## Benchmark Inference and Evaluation
61
+ Please follow [dataset preparation instructions](datasets/DATASETS.md) to download datasets.
62
+
63
+ Download our trained models and put them to `models/` for evaluation.
64
+ ### *Object Detection on COCO 2017 Dataset*
65
+
66
+ | Model | val AP | test-dev AP | Download |
67
+ |-----------------------|-----------------|----------|----------|
68
+ |[GRiT (ViT-B)](configs/GRiT_B_ObjectDet.yaml)|53.7|53.8| [model](https://datarelease.blob.core.windows.net/grit/models/grit_b_objectdet.pth) |
69
+ |[GRiT (ViT-L)](configs/GRiT_L_ObjectDet.yaml)|56.4|56.6| [model](https://datarelease.blob.core.windows.net/grit/models/grit_l_objectdet.pth) |
70
+ |[GRiT (ViT-H)](configs/GRiT_H_ObjectDet.yaml)|60.4|60.4| [model](https://datarelease.blob.core.windows.net/grit/models/grit_h_objectdet.pth) |
71
+
72
+ To evaluate the trained GRiT on coco 2017 val, run:
73
+ ~~~
74
+ # GRiT (ViT-B)
75
+ python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_B_ObjectDet.yaml --output-dir-name ./output/grit_b_objectdet --eval-only MODEL.WEIGHTS models/grit_b_objectdet.pth
76
+ # GRiT (ViT-L)
77
+ python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_L_ObjectDet.yaml --output-dir-name ./output/grit_l_objectdet --eval-only MODEL.WEIGHTS models/grit_l_objectdet.pth
78
+ # GRiT (ViT-H)
79
+ python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_H_ObjectDet.yaml --output-dir-name ./output/grit_h_objectdet --eval-only MODEL.WEIGHTS models/grit_h_objectdet.pth
80
+ ~~~
81
+
82
+ ### *Dense Captioning on VG Dataset*
83
+ | Model | mAP | Download |
84
+ |-----------------------|-----------------|----------|
85
+ |[GRiT (ViT-B)](configs/GRiT_B_DenseCap.yaml)|15.5| [model](https://datarelease.blob.core.windows.net/grit/models/grit_b_densecap.pth) |
86
+
87
+ To test on VG test set, run:
88
+ ~~~
89
+ python train_net.py --num-gpus-per-machine 8 --config-file configs/GRiT_B_DenseCap.yaml --output-dir-name ./output/grit_b_densecap --eval-only MODEL.WEIGHTS models/grit_b_densecap.pth
90
+ ~~~
91
+ It will save the inference results to `output/grit_b_densecap/vg_instances_results.json`.
92
+ We use the VG dense captioning [official evaluation codebase](https://github.com/jcjohnson/densecap)
93
+ to report the results. We didn't integrate the evaluation code into our project as it was written in Lua.
94
+ To evaluate on VG, please follow the original codebase's instructions and test based upon it. We're happy to discuss
95
+ in our issue section about the issues you may encounter when using their code.
96
+
97
+ ## Training
98
+ To save training memory, we use [DeepSpeed](https://github.com/microsoft/DeepSpeed) for training which can work well for
99
+ [activation checkpointing](https://pytorch.org/docs/stable/checkpoint.html) in distributed training.
100
+
101
+ To train on single machine node, run:
102
+ ~~~
103
+ python train_deepspeed.py --num-gpus-per-machine 8 --config-file configs/GRiT_B_ObjectDet.yaml --output-dir-name ./output/grit_b_objectdet
104
+ ~~~
105
+
106
+ To train on multiple machine nodes, run:
107
+ ~~~
108
+ python train_deepspeed.py --num-machines 4 --num-gpus-per-machine 8 --config-file configs/GRiT_B_ObjectDet.yaml --output-dir-name ./output/grit_b_objectdet
109
+ ~~~
110
+
111
+ ## Acknowledgement
112
+ Our code is in part based on [Detic](https://github.com/facebookresearch/Detic),
113
+ [CenterNet2](https://github.com/xingyizhou/CenterNet2),
114
+ [detectron2](https://github.com/facebookresearch/detectron2),
115
+ [GIT](https://github.com/microsoft/GenerativeImage2Text), and
116
+ [transformers](https://github.com/huggingface/transformers).
117
+ We thank the authors and appreciate their great works!
118
+
119
+ ## Citation
120
+
121
+ If you find our work interesting and would like to cite it, please use the following BibTeX entry.
122
 
123
+ @article{wu2022grit,
124
+ title={GRiT: A Generative Region-to-text Transformer for Object Understanding},
125
+ author={Wu, Jialian and Wang, Jianfeng and Yang, Zhengyuan and Gan, Zhe and Liu, Zicheng and Yuan, Junsong and Wang, Lijuan},
126
+ journal={arXiv preprint arXiv:2212.00280},
127
+ year={2022}
128
+ }
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # os.system("sudo apt-get update && sudo apt-get install -y git")
3
+ # os.system("sudo apt-get -y install pybind11-dev")
4
+ # os.system("git clone https://github.com/facebookresearch/detectron2.git")
5
+ # os.system("pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html")
6
+ os.system("cd detectron2 && pip install detectron2-0.6-cp310-cp310-linux_x86_64.whl")
7
+ # os.system("pip3 install torch torchvision torchaudio")
8
+ os.system("pip install deepspeed==0.7.0")
9
+
10
+ import site
11
+ from importlib import reload
12
+ reload(site)
13
+
14
+ from PIL import Image
15
+ import argparse
16
+ import sys
17
+ import numpy as np
18
+ import cv2
19
+ import gradio as gr
20
+
21
+ from detectron2.config import get_cfg
22
+ from detectron2.data.detection_utils import read_image
23
+ from detectron2.utils.logger import setup_logger
24
+
25
+ sys.path.insert(0, "third_party/CenterNet2/projects/CenterNet2/")
26
+ from centernet.config import add_centernet_config
27
+ from grit.config import add_grit_config
28
+
29
+ from grit.predictor import VisualizationDemo
30
+
31
+ def get_parser():
32
+ parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
33
+ parser.add_argument(
34
+ "--config-file",
35
+ default="configs/GRiT_B_DenseCap_ObjectDet.yaml",
36
+ metavar="FILE",
37
+ help="path to config file",
38
+ )
39
+ parser.add_argument("--cpu", action="store_true", help="Use CPU only.")
40
+ parser.add_argument(
41
+ "--confidence-threshold",
42
+ type=float,
43
+ default=0.5,
44
+ help="Minimum score for instance predictions to be shown",
45
+ )
46
+ parser.add_argument(
47
+ "--test-task",
48
+ type=str,
49
+ default="",
50
+ help="Choose a task to have GRiT perform",
51
+ )
52
+ parser.add_argument(
53
+ "--opts",
54
+ help="Modify config options using the command-line 'KEY VALUE' pairs",
55
+ default=["MODEL.WEIGHTS", "./models/grit_b_densecap_objectdet.pth"],
56
+ nargs=argparse.REMAINDER,
57
+ )
58
+ return parser
59
+
60
+ def setup_cfg(args):
61
+ cfg = get_cfg()
62
+ if args.cpu:
63
+ cfg.MODEL.DEVICE = "cpu"
64
+ add_centernet_config(cfg)
65
+ add_grit_config(cfg)
66
+ cfg.merge_from_file(args.config_file)
67
+ cfg.merge_from_list(args.opts)
68
+ # Set score_threshold for builtin models
69
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
70
+ cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = (
71
+ args.confidence_threshold
72
+ )
73
+ if args.test_task:
74
+ cfg.MODEL.TEST_TASK = args.test_task
75
+ cfg.MODEL.BEAM_SIZE = 1
76
+ cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
77
+ cfg.USE_ACT_CHECKPOINT = False
78
+ cfg.freeze()
79
+ return cfg
80
+
81
+ def predict(image_file):
82
+ image_array = np.array(image_file)[:, :, ::-1] # BGR
83
+ _, visualized_output = dense_captioning_demo.run_on_image(image_array)
84
+ visualized_output.save(os.path.join(os.getcwd(), "output.jpg"))
85
+ output_image = cv2.imread(os.path.join(os.getcwd(), "output.jpg"))
86
+ output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
87
+ return Image.fromarray(output_image)
88
+
89
+
90
+
91
+ args = get_parser().parse_args()
92
+ args.test_task = "DenseCap"
93
+ setup_logger(name="fvcore")
94
+ logger = setup_logger()
95
+ logger.info("Arguments: " + str(args))
96
+
97
+ cfg = setup_cfg(args)
98
+
99
+ dense_captioning_demo = VisualizationDemo(cfg)
100
+
101
+ demo = gr.Interface(
102
+ title="Dense Captioning - GRiT",
103
+ fn=predict,
104
+ inputs=gr.Image(type='pil', label="Original Image"),
105
+ outputs=gr.Image(type="pil",label="Output Image"),
106
+ examples=["example_1.jpg", "example_2.jpg"],
107
+ )
108
+
109
+ demo.launch()
configs/Base.yaml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GRiT"
3
+ MASK_ON: True
4
+ PROPOSAL_GENERATOR:
5
+ NAME: "CenterNet"
6
+ FPN:
7
+ IN_FEATURES: ["layer3", "layer4", "layer5"]
8
+ PIXEL_MEAN: [123.675, 116.280, 103.530]
9
+ PIXEL_STD: [58.395, 57.12, 57.375]
10
+ ROI_HEADS:
11
+ NAME: GRiTROIHeadsAndTextDecoder
12
+ IN_FEATURES: ["p3", "p4", "p5"]
13
+ IOU_THRESHOLDS: [0.6]
14
+ NUM_CLASSES: 1
15
+ SCORE_THRESH_TEST: 0.02
16
+ NMS_THRESH_TEST: 0.5
17
+ OBJECT_FEAT_POOLER_RES: 14
18
+ ROI_BOX_CASCADE_HEAD:
19
+ IOUS: [0.6, 0.7, 0.8]
20
+ ROI_BOX_HEAD:
21
+ NAME: "FastRCNNConvFCHead"
22
+ NUM_FC: 2
23
+ POOLER_RESOLUTION: 7
24
+ CLS_AGNOSTIC_BBOX_REG: True
25
+ MULT_PROPOSAL_SCORE: True
26
+ ROI_MASK_HEAD:
27
+ NAME: "MaskRCNNConvUpsampleHead"
28
+ NUM_CONV: 4
29
+ POOLER_RESOLUTION: 14
30
+ CLS_AGNOSTIC_MASK: True
31
+ CENTERNET:
32
+ NUM_CLASSES: 1
33
+ REG_WEIGHT: 1.
34
+ NOT_NORM_REG: True
35
+ ONLY_PROPOSAL: True
36
+ WITH_AGN_HM: True
37
+ INFERENCE_TH: 0.0001
38
+ PRE_NMS_TOPK_TRAIN: 4000
39
+ POST_NMS_TOPK_TRAIN: 2000
40
+ PRE_NMS_TOPK_TEST: 1000
41
+ POST_NMS_TOPK_TEST: 256
42
+ NMS_TH_TRAIN: 0.9
43
+ NMS_TH_TEST: 0.9
44
+ POS_WEIGHT: 0.5
45
+ NEG_WEIGHT: 0.5
46
+ IGNORE_HIGH_FP: 0.85
47
+ DATASETS:
48
+ TRAIN: ("coco_2017_train",)
49
+ TEST: ("coco_2017_val",)
50
+ DATALOADER:
51
+ SAMPLER_TRAIN: "MultiDatasetSampler"
52
+ DATASET_RATIO: [1]
53
+ DATASET_INPUT_SIZE: [1024]
54
+ DATASET_INPUT_SCALE: [[0.1, 2.0]]
55
+ FILTER_EMPTY_ANNOTATIONS: False
56
+ NUM_WORKERS: 8
57
+ TEST:
58
+ DETECTIONS_PER_IMAGE: 256
59
+ SOLVER:
60
+ LR_SCHEDULER_NAME: "WarmupCosineLR"
61
+ CHECKPOINT_PERIOD: 10000
62
+ WARMUP_ITERS: 1000
63
+ WARMUP_FACTOR: 0.001
64
+ USE_CUSTOM_SOLVER: True
65
+ OPTIMIZER: "ADAMW"
66
+ MAX_ITER: 180000
67
+ IMS_PER_BATCH: 64
68
+ BASE_LR: 0.00008
69
+ VIT_LAYER_DECAY: True
70
+ CLIP_GRADIENTS:
71
+ ENABLED: True
72
+ INPUT:
73
+ FORMAT: RGB
74
+ CUSTOM_AUG: EfficientDetResizeCrop
75
+ TRAIN_SIZE: 640
76
+ USE_ACT_CHECKPOINT: True
77
+ VERSION: 2
configs/GRiT_B_DenseCap.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base.yaml"
2
+ MODEL:
3
+ TRAIN_TASK: ["DenseCap"]
4
+ TEST_TASK: "DenseCap"
5
+ MASK_ON: False
6
+ ROI_HEADS:
7
+ SOFT_NMS_ENABLED: False
8
+ BEAM_SIZE: 1
9
+ WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
10
+ BACKBONE:
11
+ NAME: build_vit_fpn_backbone
12
+ VIT_LAYERS: 12
13
+ SOLVER:
14
+ VIT_LAYER_DECAY_RATE: 0.7
15
+ DATASETS:
16
+ TRAIN: ("vg_train",)
17
+ TEST: ("vg_test",)
18
+ DATALOADER:
19
+ DATASET_BS: 2
20
+ OUTPUT_DIR: "./output/GRiT_B_DenseCap"
configs/GRiT_B_DenseCap_ObjectDet.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base.yaml"
2
+ MODEL:
3
+ TRAIN_TASK: ["ObjectDet", "DenseCap"]
4
+ TEST_TASK: "DenseCap" # DenseCap or ObjectDet: Choose one for testing
5
+ MASK_ON: True
6
+ ROI_HEADS:
7
+ SOFT_NMS_ENABLED: False
8
+ BEAM_SIZE: 1
9
+ WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
10
+ BACKBONE:
11
+ NAME: build_vit_fpn_backbone
12
+ VIT_LAYERS: 12
13
+ SOLVER:
14
+ VIT_LAYER_DECAY_RATE: 0.7
15
+ DATASETS:
16
+ TRAIN: ("GRiT_coco2017_train", "vg_train")
17
+ TEST: ("coco_2017_test-dev",)
18
+ DATALOADER:
19
+ DATASET_RATIO: [1, 1]
20
+ DATASET_BS: 2
21
+ DATASET_INPUT_SIZE: [1024, 1024]
22
+ DATASET_INPUT_SCALE: [[0.1, 2.0], [0.1, 2.0]]
23
+ OUTPUT_DIR: "./output/GRiT_B_DenseCap_ObjectDet"
configs/GRiT_B_ObjectDet.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base.yaml"
2
+ MODEL:
3
+ TRAIN_TASK: ["ObjectDet"]
4
+ TEST_TASK: "ObjectDet"
5
+ MASK_ON: True
6
+ ROI_HEADS:
7
+ SOFT_NMS_ENABLED: True
8
+ BEAM_SIZE: 3
9
+ WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
10
+ BACKBONE:
11
+ NAME: build_vit_fpn_backbone
12
+ VIT_LAYERS: 12
13
+ SOLVER:
14
+ VIT_LAYER_DECAY_RATE: 0.7
15
+ DATASETS:
16
+ TRAIN: ("GRiT_coco2017_train",)
17
+ TEST: ("coco_2017_val",)
18
+ DATALOADER:
19
+ DATASET_BS: 2
20
+ OUTPUT_DIR: "./output/GRiT_B_ObjectDet"
configs/GRiT_H_ObjectDet.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base.yaml"
2
+ MODEL:
3
+ TRAIN_TASK: ["ObjectDet"]
4
+ TEST_TASK: "ObjectDet"
5
+ MASK_ON: True
6
+ ROI_HEADS:
7
+ SOFT_NMS_ENABLED: True
8
+ BEAM_SIZE: 3
9
+ WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_huge_p14to16.pth"
10
+ BACKBONE:
11
+ NAME: build_vit_fpn_backbone_huge
12
+ VIT_LAYERS: 32
13
+ SOLVER:
14
+ MAX_ITER: 135000
15
+ VIT_LAYER_DECAY_RATE: 0.9
16
+ DATASETS:
17
+ TRAIN: ("GRiT_coco2017_train",)
18
+ TEST: ("coco_2017_val",)
19
+ DATALOADER:
20
+ DATASET_BS: 1
21
+ OUTPUT_DIR: "./output/GRiT_H_ObjectDet"
configs/GRiT_L_ObjectDet.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base.yaml"
2
+ MODEL:
3
+ TRAIN_TASK: ["ObjectDet"]
4
+ TEST_TASK: "ObjectDet"
5
+ MASK_ON: True
6
+ ROI_HEADS:
7
+ SOFT_NMS_ENABLED: True
8
+ BEAM_SIZE: 3
9
+ WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth"
10
+ BACKBONE:
11
+ NAME: build_vit_fpn_backbone_large
12
+ VIT_LAYERS: 24
13
+ SOLVER:
14
+ VIT_LAYER_DECAY_RATE: 0.8
15
+ DATASETS:
16
+ TRAIN: ("GRiT_coco2017_train",)
17
+ TEST: ("coco_2017_val",)
18
+ DATALOADER:
19
+ DATASET_BS: 1
20
+ OUTPUT_DIR: "./output/GRiT_L_ObjectDet"
datasets/DATASETS.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dataset preparation
2
+
3
+
4
+ ## COCO Dataset
5
+
6
+ - Download the coco 2017 dataset from the [official website](https://cocodataset.org/#download).
7
+
8
+ Dataset strcture should look like:
9
+ ~~~
10
+ ${GRiT_ROOT}
11
+ |-- datasets
12
+ `-- |-- coco
13
+ |-- |-- train2017/
14
+ |-- |-- val2017/
15
+ |-- |-- test2017/
16
+ |-- |-- annotations/
17
+ |-- |-- |-- instances_train2017.json
18
+ |-- |-- |-- instances_val2017.json
19
+ |-- |-- |-- image_info_test-dev2017.json
20
+ ~~~
21
+
22
+ ## VG Dataset
23
+ - Download images from [official website](https://visualgenome.org/api/v0/api_home.html)
24
+ - Download our pre-processed annotations:
25
+ [train.json](https://datarelease.blob.core.windows.net/grit/VG_preprocessed_annotations/train.json) and
26
+ [test.json](https://datarelease.blob.core.windows.net/grit/VG_preprocessed_annotations/test.json)
27
+
28
+ Dataset strcture should look like:
29
+ ~~~
30
+ ${GRiT_ROOT}
31
+ |-- datasets
32
+ `-- |-- vg
33
+ |-- |-- images/
34
+ |-- |-- annotations/
35
+ |-- |-- |-- train.json
36
+ |-- |-- |-- test.json
37
+ ~~~
38
+
39
+ ## References
40
+ Please cite the corresponding references if you use the datasets.
41
+
42
+ ~~~
43
+ @inproceedings{lin2014microsoft,
44
+ title={Microsoft coco: Common objects in context},
45
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
46
+ booktitle={European conference on computer vision},
47
+ pages={740--755},
48
+ year={2014},
49
+ organization={Springer}
50
+ }
51
+
52
+ @article{krishna2017visual,
53
+ title={Visual genome: Connecting language and vision using crowdsourced dense image annotations},
54
+ author={Krishna, Ranjay and Zhu, Yuke and Groth, Oliver and Johnson, Justin and Hata, Kenji and Kravitz, Joshua and Chen, Stephanie and Kalantidis, Yannis and Li, Li-Jia and Shamma, David A and others},
55
+ journal={International journal of computer vision},
56
+ volume={123},
57
+ number={1},
58
+ pages={32--73},
59
+ year={2017},
60
+ publisher={Springer}
61
+ }
62
+ ~~~
demo.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import multiprocessing as mp
3
+ import os
4
+ import time
5
+ import cv2
6
+ import tqdm
7
+ import sys
8
+
9
+ from detectron2.config import get_cfg
10
+ from detectron2.data.detection_utils import read_image
11
+ from detectron2.utils.logger import setup_logger
12
+
13
+ sys.path.insert(0, 'third_party/CenterNet2/projects/CenterNet2/')
14
+ from centernet.config import add_centernet_config
15
+ from grit.config import add_grit_config
16
+
17
+ from grit.predictor import VisualizationDemo
18
+
19
+
20
+ # constants
21
+ WINDOW_NAME = "GRiT"
22
+
23
+
24
+ def setup_cfg(args):
25
+ cfg = get_cfg()
26
+ if args.cpu:
27
+ cfg.MODEL.DEVICE="cpu"
28
+ add_centernet_config(cfg)
29
+ add_grit_config(cfg)
30
+ cfg.merge_from_file(args.config_file)
31
+ cfg.merge_from_list(args.opts)
32
+ # Set score_threshold for builtin models
33
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
34
+ cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
35
+ if args.test_task:
36
+ cfg.MODEL.TEST_TASK = args.test_task
37
+ cfg.MODEL.BEAM_SIZE = 1
38
+ cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
39
+ cfg.USE_ACT_CHECKPOINT = False
40
+ cfg.freeze()
41
+ return cfg
42
+
43
+
44
+ def get_parser():
45
+ parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
46
+ parser.add_argument(
47
+ "--config-file",
48
+ default="",
49
+ metavar="FILE",
50
+ help="path to config file",
51
+ )
52
+ parser.add_argument("--cpu", action='store_true', help="Use CPU only.")
53
+ parser.add_argument(
54
+ "--input",
55
+ nargs="+",
56
+ help="A list of space separated input images; "
57
+ "or a single glob pattern such as 'directory/*.jpg'",
58
+ )
59
+ parser.add_argument(
60
+ "--output",
61
+ help="A file or directory to save output visualizations. "
62
+ "If not given, will show output in an OpenCV window.",
63
+ )
64
+ parser.add_argument(
65
+ "--confidence-threshold",
66
+ type=float,
67
+ default=0.5,
68
+ help="Minimum score for instance predictions to be shown",
69
+ )
70
+ parser.add_argument(
71
+ "--test-task",
72
+ type=str,
73
+ default='',
74
+ help="Choose a task to have GRiT perform",
75
+ )
76
+ parser.add_argument(
77
+ "--opts",
78
+ help="Modify config options using the command-line 'KEY VALUE' pairs",
79
+ default=[],
80
+ nargs=argparse.REMAINDER,
81
+ )
82
+ return parser
83
+
84
+
85
+ if __name__ == "__main__":
86
+ mp.set_start_method("spawn", force=True)
87
+ args = get_parser().parse_args()
88
+ setup_logger(name="fvcore")
89
+ logger = setup_logger()
90
+ logger.info("Arguments: " + str(args))
91
+
92
+ cfg = setup_cfg(args)
93
+
94
+ demo = VisualizationDemo(cfg)
95
+
96
+ if args.input:
97
+ for path in tqdm.tqdm(os.listdir(args.input[0]), disable=not args.output):
98
+ img = read_image(os.path.join(args.input[0], path), format="BGR")
99
+ start_time = time.time()
100
+ predictions, visualized_output = demo.run_on_image(img)
101
+ logger.info(
102
+ "{}: {} in {:.2f}s".format(
103
+ path,
104
+ "detected {} instances".format(len(predictions["instances"]))
105
+ if "instances" in predictions
106
+ else "finished",
107
+ time.time() - start_time,
108
+ )
109
+ )
110
+
111
+ if args.output:
112
+ if not os.path.exists(args.output):
113
+ os.mkdir(args.output)
114
+ if os.path.isdir(args.output):
115
+ assert os.path.isdir(args.output), args.output
116
+ out_filename = os.path.join(args.output, os.path.basename(path))
117
+ else:
118
+ assert len(args.input) == 1, "Please specify a directory with args.output"
119
+ out_filename = args.output
120
+ visualized_output.save(out_filename)
121
+ else:
122
+ cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
123
+ cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
124
+ if cv2.waitKey(0) == 27:
125
+ break # esc to quit
demo_images/000000353174.jpg ADDED
demo_images/000000438652.jpg ADDED
demo_images/000000453583.jpg ADDED
demo_images/000000466185.jpg ADDED
demo_images/000000497110.jpg ADDED
demo_images/000000497861.jpg ADDED
demo_images/000000546072.jpg ADDED
detectron2/.circleci/config.yml ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # -------------------------------------------------------------------------------------
4
+ # Environments to run the jobs in
5
+ # -------------------------------------------------------------------------------------
6
+ cpu: &cpu
7
+ machine:
8
+ image: ubuntu-2004:202107-02
9
+ resource_class: medium
10
+
11
+ gpu: &gpu
12
+ machine:
13
+ # NOTE: use a cuda vesion that's supported by all our pytorch versions
14
+ image: ubuntu-1604-cuda-11.1:202012-01
15
+ resource_class: gpu.nvidia.small
16
+
17
+ windows-cpu: &windows_cpu
18
+ machine:
19
+ resource_class: windows.medium
20
+ image: windows-server-2019-vs2019:stable
21
+ shell: powershell.exe
22
+
23
+ # windows-gpu: &windows_gpu
24
+ # machine:
25
+ # resource_class: windows.gpu.nvidia.medium
26
+ # image: windows-server-2019-nvidia:stable
27
+
28
+ version_parameters: &version_parameters
29
+ parameters:
30
+ pytorch_version:
31
+ type: string
32
+ torchvision_version:
33
+ type: string
34
+ pytorch_index:
35
+ type: string
36
+ # use test wheels index to have access to RC wheels
37
+ # https://download.pytorch.org/whl/test/torch_test.html
38
+ default: "https://download.pytorch.org/whl/torch_stable.html"
39
+ python_version: # NOTE: only affect linux
40
+ type: string
41
+ default: '3.6.8'
42
+
43
+ environment:
44
+ PYTORCH_VERSION: << parameters.pytorch_version >>
45
+ TORCHVISION_VERSION: << parameters.torchvision_version >>
46
+ PYTORCH_INDEX: << parameters.pytorch_index >>
47
+ PYTHON_VERSION: << parameters.python_version>>
48
+ # point datasets to ~/.torch so it's cached in CI
49
+ DETECTRON2_DATASETS: ~/.torch/datasets
50
+
51
+ # -------------------------------------------------------------------------------------
52
+ # Re-usable commands
53
+ # -------------------------------------------------------------------------------------
54
+ # install_nvidia_driver: &install_nvidia_driver
55
+ # - run:
56
+ # name: Install nvidia driver
57
+ # working_directory: ~/
58
+ # command: |
59
+ # wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run'
60
+ # sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm
61
+ # nvidia-smi
62
+
63
+ add_ssh_keys: &add_ssh_keys
64
+ # https://circleci.com/docs/2.0/add-ssh-key/
65
+ - add_ssh_keys:
66
+ fingerprints:
67
+ - "e4:13:f2:22:d4:49:e8:e4:57:5a:ac:20:2f:3f:1f:ca"
68
+
69
+ install_python: &install_python
70
+ - run:
71
+ name: Install Python
72
+ working_directory: ~/
73
+ command: |
74
+ # upgrade pyenv
75
+ cd /opt/circleci/.pyenv/plugins/python-build/../.. && git pull && cd -
76
+ pyenv install -s $PYTHON_VERSION
77
+ pyenv global $PYTHON_VERSION
78
+ python --version
79
+ which python
80
+ pip install --upgrade pip
81
+
82
+ setup_venv: &setup_venv
83
+ - run:
84
+ name: Setup Virtual Env
85
+ working_directory: ~/
86
+ command: |
87
+ python -m venv ~/venv
88
+ echo ". ~/venv/bin/activate" >> $BASH_ENV
89
+ . ~/venv/bin/activate
90
+ python --version
91
+ which python
92
+ which pip
93
+ pip install --upgrade pip
94
+
95
+ setup_venv_win: &setup_venv_win
96
+ - run:
97
+ name: Setup Virutal Env for Windows
98
+ command: |
99
+ pip install virtualenv
100
+ python -m virtualenv env
101
+ .\env\Scripts\activate
102
+ python --version
103
+ which python
104
+ which pip
105
+
106
+ install_linux_dep: &install_linux_dep
107
+ - run:
108
+ name: Install Dependencies
109
+ command: |
110
+ # disable crash coredump, so unittests fail fast
111
+ sudo systemctl stop apport.service
112
+ # install from github to get latest; install iopath first since fvcore depends on it
113
+ pip install --progress-bar off -U 'git+https://github.com/facebookresearch/iopath'
114
+ pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore'
115
+ # Don't use pytest-xdist: cuda tests are unstable under multi-process workers.
116
+ pip install --progress-bar off ninja opencv-python-headless pytest tensorboard pycocotools
117
+ pip install --progress-bar off torch==$PYTORCH_VERSION -f $PYTORCH_INDEX
118
+ if [[ "$TORCHVISION_VERSION" == "master" ]]; then
119
+ pip install git+https://github.com/pytorch/vision.git
120
+ else
121
+ pip install --progress-bar off torchvision==$TORCHVISION_VERSION -f $PYTORCH_INDEX
122
+ fi
123
+
124
+ python -c 'import torch; print("CUDA:", torch.cuda.is_available())'
125
+ gcc --version
126
+
127
+ install_detectron2: &install_detectron2
128
+ - run:
129
+ name: Install Detectron2
130
+ command: |
131
+ # Remove first, in case it's in the CI cache
132
+ pip uninstall -y detectron2
133
+
134
+ pip install --progress-bar off -e .[all]
135
+ python -m detectron2.utils.collect_env
136
+ ./datasets/prepare_for_tests.sh
137
+
138
+ run_unittests: &run_unittests
139
+ - run:
140
+ name: Run Unit Tests
141
+ command: |
142
+ pytest -v --durations=15 tests # parallel causes some random failures
143
+
144
+ uninstall_tests: &uninstall_tests
145
+ - run:
146
+ name: Run Tests After Uninstalling
147
+ command: |
148
+ pip uninstall -y detectron2
149
+ # Remove built binaries
150
+ rm -rf build/ detectron2/*.so
151
+ # Tests that code is importable without installation
152
+ PYTHONPATH=. ./.circleci/import-tests.sh
153
+
154
+
155
+ # -------------------------------------------------------------------------------------
156
+ # Jobs to run
157
+ # -------------------------------------------------------------------------------------
158
+ jobs:
159
+ linux_cpu_tests:
160
+ <<: *cpu
161
+ <<: *version_parameters
162
+
163
+ working_directory: ~/detectron2
164
+
165
+ steps:
166
+ - checkout
167
+
168
+ # Cache the venv directory that contains python, dependencies, and checkpoints
169
+ # Refresh the key when dependencies should be updated (e.g. when pytorch releases)
170
+ - restore_cache:
171
+ keys:
172
+ - cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
173
+
174
+ - <<: *install_python
175
+ - <<: *install_linux_dep
176
+ - <<: *install_detectron2
177
+ - <<: *run_unittests
178
+ - <<: *uninstall_tests
179
+
180
+ - save_cache:
181
+ paths:
182
+ - /opt/circleci/.pyenv
183
+ - ~/.torch
184
+ key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
185
+
186
+
187
+ linux_gpu_tests:
188
+ <<: *gpu
189
+ <<: *version_parameters
190
+
191
+ working_directory: ~/detectron2
192
+
193
+ steps:
194
+ - checkout
195
+
196
+ - restore_cache:
197
+ keys:
198
+ - cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
199
+
200
+ - <<: *install_python
201
+ - <<: *install_linux_dep
202
+ - <<: *install_detectron2
203
+ - <<: *run_unittests
204
+ - <<: *uninstall_tests
205
+
206
+ - save_cache:
207
+ paths:
208
+ - /opt/circleci/.pyenv
209
+ - ~/.torch
210
+ key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210827
211
+
212
+ windows_cpu_build:
213
+ <<: *windows_cpu
214
+ <<: *version_parameters
215
+ steps:
216
+ - <<: *add_ssh_keys
217
+ - checkout
218
+ - <<: *setup_venv_win
219
+
220
+ # Cache the env directory that contains dependencies
221
+ - restore_cache:
222
+ keys:
223
+ - cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210404
224
+
225
+ - run:
226
+ name: Install Dependencies
227
+ command: |
228
+ pip install certifi --ignore-installed # required on windows to workaround some cert issue
229
+ pip install numpy cython # required on windows before pycocotools
230
+ pip install opencv-python-headless pytest-xdist pycocotools tensorboard
231
+ pip install -U git+https://github.com/facebookresearch/iopath
232
+ pip install -U git+https://github.com/facebookresearch/fvcore
233
+ pip install torch==$env:PYTORCH_VERSION torchvision==$env:TORCHVISION_VERSION -f $env:PYTORCH_INDEX
234
+
235
+ - save_cache:
236
+ paths:
237
+ - env
238
+ key: cache-{{ arch }}-<< parameters.pytorch_version >>-{{ .Branch }}-20210404
239
+
240
+ - <<: *install_detectron2
241
+ # TODO: unittest fails for now
242
+
243
+ workflows:
244
+ version: 2
245
+ regular_test:
246
+ jobs:
247
+ - linux_cpu_tests:
248
+ name: linux_cpu_tests_pytorch1.10
249
+ pytorch_version: '1.10.0+cpu'
250
+ torchvision_version: '0.11.1+cpu'
251
+ - linux_gpu_tests:
252
+ name: linux_gpu_tests_pytorch1.8
253
+ pytorch_version: '1.8.1+cu111'
254
+ torchvision_version: '0.9.1+cu111'
255
+ - linux_gpu_tests:
256
+ name: linux_gpu_tests_pytorch1.9
257
+ pytorch_version: '1.9+cu111'
258
+ torchvision_version: '0.10+cu111'
259
+ - linux_gpu_tests:
260
+ name: linux_gpu_tests_pytorch1.10
261
+ pytorch_version: '1.10+cu111'
262
+ torchvision_version: '0.11.1+cu111'
263
+ - linux_gpu_tests:
264
+ name: linux_gpu_tests_pytorch1.10_python39
265
+ pytorch_version: '1.10+cu111'
266
+ torchvision_version: '0.11.1+cu111'
267
+ python_version: '3.9.6'
268
+ - windows_cpu_build:
269
+ pytorch_version: '1.10+cpu'
270
+ torchvision_version: '0.11.1+cpu'
detectron2/.circleci/import-tests.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+
4
+ # Test that import works without building detectron2.
5
+
6
+ # Check that _C is not importable
7
+ python -c "from detectron2 import _C" > /dev/null 2>&1 && {
8
+ echo "This test should be run without building detectron2."
9
+ exit 1
10
+ }
11
+
12
+ # Check that other modules are still importable, even when _C is not importable
13
+ python -c "from detectron2 import modeling"
14
+ python -c "from detectron2 import modeling, data"
15
+ python -c "from detectron2 import evaluation, export, checkpoint"
16
+ python -c "from detectron2 import utils, engine"
detectron2/.clang-format ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AccessModifierOffset: -1
2
+ AlignAfterOpenBracket: AlwaysBreak
3
+ AlignConsecutiveAssignments: false
4
+ AlignConsecutiveDeclarations: false
5
+ AlignEscapedNewlinesLeft: true
6
+ AlignOperands: false
7
+ AlignTrailingComments: false
8
+ AllowAllParametersOfDeclarationOnNextLine: false
9
+ AllowShortBlocksOnASingleLine: false
10
+ AllowShortCaseLabelsOnASingleLine: false
11
+ AllowShortFunctionsOnASingleLine: Empty
12
+ AllowShortIfStatementsOnASingleLine: false
13
+ AllowShortLoopsOnASingleLine: false
14
+ AlwaysBreakAfterReturnType: None
15
+ AlwaysBreakBeforeMultilineStrings: true
16
+ AlwaysBreakTemplateDeclarations: true
17
+ BinPackArguments: false
18
+ BinPackParameters: false
19
+ BraceWrapping:
20
+ AfterClass: false
21
+ AfterControlStatement: false
22
+ AfterEnum: false
23
+ AfterFunction: false
24
+ AfterNamespace: false
25
+ AfterObjCDeclaration: false
26
+ AfterStruct: false
27
+ AfterUnion: false
28
+ BeforeCatch: false
29
+ BeforeElse: false
30
+ IndentBraces: false
31
+ BreakBeforeBinaryOperators: None
32
+ BreakBeforeBraces: Attach
33
+ BreakBeforeTernaryOperators: true
34
+ BreakConstructorInitializersBeforeComma: false
35
+ BreakAfterJavaFieldAnnotations: false
36
+ BreakStringLiterals: false
37
+ ColumnLimit: 80
38
+ CommentPragmas: '^ IWYU pragma:'
39
+ ConstructorInitializerAllOnOneLineOrOnePerLine: true
40
+ ConstructorInitializerIndentWidth: 4
41
+ ContinuationIndentWidth: 4
42
+ Cpp11BracedListStyle: true
43
+ DerivePointerAlignment: false
44
+ DisableFormat: false
45
+ ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ]
46
+ IncludeCategories:
47
+ - Regex: '^<.*\.h(pp)?>'
48
+ Priority: 1
49
+ - Regex: '^<.*'
50
+ Priority: 2
51
+ - Regex: '.*'
52
+ Priority: 3
53
+ IndentCaseLabels: true
54
+ IndentWidth: 2
55
+ IndentWrappedFunctionNames: false
56
+ KeepEmptyLinesAtTheStartOfBlocks: false
57
+ MacroBlockBegin: ''
58
+ MacroBlockEnd: ''
59
+ MaxEmptyLinesToKeep: 1
60
+ NamespaceIndentation: None
61
+ ObjCBlockIndentWidth: 2
62
+ ObjCSpaceAfterProperty: false
63
+ ObjCSpaceBeforeProtocolList: false
64
+ PenaltyBreakBeforeFirstCallParameter: 1
65
+ PenaltyBreakComment: 300
66
+ PenaltyBreakFirstLessLess: 120
67
+ PenaltyBreakString: 1000
68
+ PenaltyExcessCharacter: 1000000
69
+ PenaltyReturnTypeOnItsOwnLine: 200
70
+ PointerAlignment: Left
71
+ ReflowComments: true
72
+ SortIncludes: true
73
+ SpaceAfterCStyleCast: false
74
+ SpaceBeforeAssignmentOperators: true
75
+ SpaceBeforeParens: ControlStatements
76
+ SpaceInEmptyParentheses: false
77
+ SpacesBeforeTrailingComments: 1
78
+ SpacesInAngles: false
79
+ SpacesInContainerLiterals: true
80
+ SpacesInCStyleCastParentheses: false
81
+ SpacesInParentheses: false
82
+ SpacesInSquareBrackets: false
83
+ Standard: Cpp11
84
+ TabWidth: 8
85
+ UseTab: Never
detectron2/.flake8 ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is an example .flake8 config, used when developing *Black* itself.
2
+ # Keep in sync with setup.cfg which is used for source packages.
3
+
4
+ [flake8]
5
+ ignore = W503, E203, E221, C901, C408, E741, C407, B017, F811
6
+ max-line-length = 100
7
+ max-complexity = 18
8
+ select = B,C,E,F,W,T4,B9
9
+ exclude = build
10
+ per-file-ignores =
11
+ **/__init__.py:F401,F403,E402
12
+ **/configs/**.py:F401,E402
13
+ configs/**.py:F401,E402
14
+ **/tests/config/**.py:F401,E402
15
+ tests/config/**.py:F401,E402
detectron2/.gitignore ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # output dir
2
+ output
3
+ instant_test_output
4
+ inference_test_output
5
+
6
+
7
+ *.png
8
+ *.json
9
+ *.diff
10
+ *.jpg
11
+ !/projects/DensePose/doc/images/*.jpg
12
+
13
+ # compilation and distribution
14
+ __pycache__
15
+ _ext
16
+ *.pyc
17
+ *.pyd
18
+ *.so
19
+ *.dll
20
+ *.egg-info/
21
+ build/
22
+ dist/
23
+ wheels/
24
+
25
+ # pytorch/python/numpy formats
26
+ *.pth
27
+ *.pkl
28
+ *.npy
29
+ *.ts
30
+ model_ts*.txt
31
+
32
+ # ipython/jupyter notebooks
33
+ *.ipynb
34
+ **/.ipynb_checkpoints/
35
+
36
+ # Editor temporaries
37
+ *.swn
38
+ *.swo
39
+ *.swp
40
+ *~
41
+
42
+ # editor settings
43
+ .idea
44
+ .vscode
45
+ _darcs
46
+
47
+ # project dirs
48
+ /detectron2/model_zoo/configs
49
+ /datasets/*
50
+ !/datasets/*.*
51
+ /projects/*/datasets
52
+ /models
53
+ /snippet
detectron2/GETTING_STARTED.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Getting Started with Detectron2
2
+
3
+ This document provides a brief intro of the usage of builtin command-line tools in detectron2.
4
+
5
+ For a tutorial that involves actual coding with the API,
6
+ see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
7
+ which covers how to run inference with an
8
+ existing model, and how to train a builtin model on a custom dataset.
9
+
10
+
11
+ ### Inference Demo with Pre-trained Models
12
+
13
+ 1. Pick a model and its config file from
14
+ [model zoo](MODEL_ZOO.md),
15
+ for example, `mask_rcnn_R_50_FPN_3x.yaml`.
16
+ 2. We provide `demo.py` that is able to demo builtin configs. Run it with:
17
+ ```
18
+ cd demo/
19
+ python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
20
+ --input input1.jpg input2.jpg \
21
+ [--other-options]
22
+ --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
23
+ ```
24
+ The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
25
+ This command will run the inference and show visualizations in an OpenCV window.
26
+
27
+ For details of the command line arguments, see `demo.py -h` or look at its source code
28
+ to understand its behavior. Some common arguments are:
29
+ * To run __on your webcam__, replace `--input files` with `--webcam`.
30
+ * To run __on a video__, replace `--input files` with `--video-input video.mp4`.
31
+ * To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
32
+ * To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
33
+
34
+
35
+ ### Training & Evaluation in Command Line
36
+
37
+ We provide two scripts in "tools/plain_train_net.py" and "tools/train_net.py",
38
+ that are made to train all the configs provided in detectron2. You may want to
39
+ use it as a reference to write your own training script.
40
+
41
+ Compared to "train_net.py", "plain_train_net.py" supports fewer default
42
+ features. It also includes fewer abstraction, therefore is easier to add custom
43
+ logic.
44
+
45
+ To train a model with "train_net.py", first
46
+ setup the corresponding datasets following
47
+ [datasets/README.md](./datasets/README.md),
48
+ then run:
49
+ ```
50
+ cd tools/
51
+ ./train_net.py --num-gpus 8 \
52
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
53
+ ```
54
+
55
+ The configs are made for 8-GPU training.
56
+ To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
57
+ ```
58
+ ./train_net.py \
59
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
60
+ --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
61
+ ```
62
+
63
+ To evaluate a model's performance, use
64
+ ```
65
+ ./train_net.py \
66
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
67
+ --eval-only MODEL.WEIGHTS /path/to/checkpoint_file
68
+ ```
69
+ For more options, see `./train_net.py -h`.
70
+
71
+ ### Use Detectron2 APIs in Your Code
72
+
73
+ See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
74
+ to learn how to use detectron2 APIs to:
75
+ 1. run inference with an existing model
76
+ 2. train a builtin model on a custom dataset
77
+
78
+ See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/main/projects)
79
+ for more ways to build your project on detectron2.
detectron2/INSTALL.md ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Installation
2
+
3
+ ### Requirements
4
+ - Linux or macOS with Python β‰₯ 3.6
5
+ - PyTorch β‰₯ 1.8 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
6
+ Install them together at [pytorch.org](https://pytorch.org) to make sure of this
7
+ - OpenCV is optional but needed by demo and visualization
8
+
9
+
10
+ ### Build Detectron2 from Source
11
+
12
+ gcc & g++ β‰₯ 5.4 are required. [ninja](https://ninja-build.org/) is optional but recommended for faster build.
13
+ After having them, run:
14
+ ```
15
+ python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
16
+ # (add --user if you don't have permission)
17
+
18
+ # Or, to install it from a local clone:
19
+ git clone https://github.com/facebookresearch/detectron2.git
20
+ python -m pip install -e detectron2
21
+
22
+ # On macOS, you may need to prepend the above commands with a few environment variables:
23
+ CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install ...
24
+ ```
25
+
26
+ To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
27
+ old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
28
+
29
+ ### Install Pre-Built Detectron2 (Linux only)
30
+
31
+ Choose from this table to install [v0.6 (Oct 2021)](https://github.com/facebookresearch/detectron2/releases):
32
+
33
+ <table class="docutils"><tbody><th width="80"> CUDA </th><th valign="bottom" align="left" width="100">torch 1.10</th><th valign="bottom" align="left" width="100">torch 1.9</th><th valign="bottom" align="left" width="100">torch 1.8</th> <tr><td align="left">11.3</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
34
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
35
+ </code></pre> </details> </td> <td align="left"> </td> <td align="left"> </td> </tr> <tr><td align="left">11.1</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
36
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html
37
+ </code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
38
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.9/index.html
39
+ </code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
40
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.8/index.html
41
+ </code></pre> </details> </td> </tr> <tr><td align="left">10.2</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
42
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.10/index.html
43
+ </code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
44
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
45
+ </code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
46
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.8/index.html
47
+ </code></pre> </details> </td> </tr> <tr><td align="left">10.1</td><td align="left"> </td> <td align="left"> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
48
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
49
+ </code></pre> </details> </td> </tr> <tr><td align="left">cpu</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
50
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html
51
+ </code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
52
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.9/index.html
53
+ </code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
54
+ https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.8/index.html
55
+ </code></pre> </details> </td> </tr></tbody></table>
56
+
57
+ Note that:
58
+ 1. The pre-built packages have to be used with corresponding version of CUDA and the official package of PyTorch.
59
+ Otherwise, please build detectron2 from source.
60
+ 2. New packages are released every few months. Therefore, packages may not contain latest features in the main
61
+ branch and may not be compatible with the main branch of a research project that uses detectron2
62
+ (e.g. those in [projects](projects)).
63
+
64
+ ### Common Installation Issues
65
+
66
+ Click each issue for its solutions:
67
+
68
+ <details>
69
+ <summary>
70
+ Undefined symbols that looks like "TH..","at::Tensor...","torch..."
71
+ </summary>
72
+ <br/>
73
+
74
+ This usually happens when detectron2 or torchvision is not
75
+ compiled with the version of PyTorch you're running.
76
+
77
+ If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
78
+ following [pytorch.org](http://pytorch.org). So the versions will match.
79
+
80
+ If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases),
81
+ uninstall and reinstall the correct pre-built detectron2 that matches pytorch version.
82
+
83
+ If the error comes from detectron2 or torchvision that you built manually from source,
84
+ remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
85
+
86
+ If the above instructions do not resolve this problem, please provide an environment (e.g. a dockerfile) that can reproduce the issue.
87
+ </details>
88
+
89
+ <details>
90
+ <summary>
91
+ Missing torch dynamic libraries, OR segmentation fault immediately when using detectron2.
92
+ </summary>
93
+ This usually happens when detectron2 or torchvision is not
94
+ compiled with the version of PyTorch you're running. See the previous common issue for the solution.
95
+ </details>
96
+
97
+ <details>
98
+ <summary>
99
+ Undefined C++ symbols (e.g. "GLIBCXX..") or C++ symbols not found.
100
+ </summary>
101
+ <br/>
102
+ Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
103
+
104
+ This often happens with old anaconda.
105
+ It may help to run `conda update libgcc` to upgrade its runtime.
106
+
107
+ The fundamental solution is to avoid the mismatch, either by compiling using older version of C++
108
+ compiler, or run the code with proper C++ runtime.
109
+ To run the code with a specific C++ runtime, you can use environment variable `LD_PRELOAD=/path/to/libstdc++.so`.
110
+
111
+ </details>
112
+
113
+ <details>
114
+ <summary>
115
+ "nvcc not found" or "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
116
+ </summary>
117
+ <br/>
118
+ CUDA is not found when building detectron2.
119
+ You should make sure
120
+
121
+ ```
122
+ python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
123
+ ```
124
+
125
+ print `(True, a directory with cuda)` at the time you build detectron2.
126
+
127
+ Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
128
+ </details>
129
+
130
+ <details>
131
+ <summary>
132
+ "invalid device function" or "no kernel image is available for execution".
133
+ </summary>
134
+ <br/>
135
+ Two possibilities:
136
+
137
+ * You build detectron2 with one version of CUDA but run it with a different version.
138
+
139
+ To check whether it is the case,
140
+ use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
141
+ In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
142
+ to contain cuda libraries of the same version.
143
+
144
+ When they are inconsistent,
145
+ you need to either install a different build of PyTorch (or build by yourself)
146
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
147
+
148
+ * PyTorch/torchvision/Detectron2 is not built for the correct GPU SM architecture (aka. compute capability).
149
+
150
+ The architecture included by PyTorch/detectron2/torchvision is available in the "architecture flags" in
151
+ `python -m detectron2.utils.collect_env`. It must include
152
+ the architecture of your GPU, which can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
153
+
154
+ If you're using pre-built PyTorch/detectron2/torchvision, they have included support for most popular GPUs already.
155
+ If not supported, you need to build them from source.
156
+
157
+ When building detectron2/torchvision from source, they detect the GPU device and build for only the device.
158
+ This means the compiled code may not work on a different GPU device.
159
+ To recompile them for the correct architecture, remove all installed/compiled files,
160
+ and rebuild them with the `TORCH_CUDA_ARCH_LIST` environment variable set properly.
161
+ For example, `export TORCH_CUDA_ARCH_LIST="6.0;7.0"` makes it compile for both P100s and V100s.
162
+ </details>
163
+
164
+ <details>
165
+ <summary>
166
+ Undefined CUDA symbols; Cannot open libcudart.so
167
+ </summary>
168
+ <br/>
169
+ The version of NVCC you use to build detectron2 or torchvision does
170
+ not match the version of CUDA you are running with.
171
+ This often happens when using anaconda's CUDA runtime.
172
+
173
+ Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
174
+ In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
175
+ to contain cuda libraries of the same version.
176
+
177
+ When they are inconsistent,
178
+ you need to either install a different build of PyTorch (or build by yourself)
179
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
180
+ </details>
181
+
182
+
183
+ <details>
184
+ <summary>
185
+ C++ compilation errors from NVCC / NVRTC, or "Unsupported gpu architecture"
186
+ </summary>
187
+ <br/>
188
+ A few possibilities:
189
+
190
+ 1. Local CUDA/NVCC version has to match the CUDA version of your PyTorch. Both can be found in `python collect_env.py`
191
+ (download from [here](./detectron2/utils/collect_env.py)).
192
+ When they are inconsistent, you need to either install a different build of PyTorch (or build by yourself)
193
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
194
+
195
+ 2. Local CUDA/NVCC version shall support the SM architecture (a.k.a. compute capability) of your GPU.
196
+ The capability of your GPU can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
197
+ The capability supported by NVCC is listed at [here](https://gist.github.com/ax3l/9489132).
198
+ If your NVCC version is too old, this can be workaround by setting environment variable
199
+ `TORCH_CUDA_ARCH_LIST` to a lower, supported capability.
200
+
201
+ 3. The combination of NVCC and GCC you use is incompatible. You need to change one of their versions.
202
+ See [here](https://gist.github.com/ax3l/9489132) for some valid combinations.
203
+ Notably, CUDA<=10.1.105 doesn't support GCC>7.3.
204
+
205
+ The CUDA/GCC version used by PyTorch can be found by `print(torch.__config__.show())`.
206
+
207
+ </details>
208
+
209
+
210
+ <details>
211
+ <summary>
212
+ "ImportError: cannot import name '_C'".
213
+ </summary>
214
+ <br/>
215
+ Please build and install detectron2 following the instructions above.
216
+
217
+ Or, if you are running code from detectron2's root directory, `cd` to a different one.
218
+ Otherwise you may not import the code that you installed.
219
+ </details>
220
+
221
+
222
+ <details>
223
+ <summary>
224
+ Any issue on windows.
225
+ </summary>
226
+ <br/>
227
+
228
+ Detectron2 is continuously built on windows with [CircleCI](https://app.circleci.com/pipelines/github/facebookresearch/detectron2?branch=main).
229
+ However we do not provide official support for it.
230
+ PRs that improves code compatibility on windows are welcome.
231
+ </details>
232
+
233
+ <details>
234
+ <summary>
235
+ ONNX conversion segfault after some "TraceWarning".
236
+ </summary>
237
+ <br/>
238
+ The ONNX package is compiled with a too old compiler.
239
+
240
+ Please build and install ONNX from its source code using a compiler
241
+ whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
242
+ </details>
243
+
244
+
245
+ <details>
246
+ <summary>
247
+ "library not found for -lstdc++" on older version of MacOS
248
+ </summary>
249
+ <br/>
250
+ See
251
+ [this stackoverflow answer](https://stackoverflow.com/questions/56083725/macos-build-issues-lstdc-not-found-while-building-python-package).
252
+
253
+ </details>
254
+
255
+
256
+ ### Installation inside specific environments:
257
+
258
+ * __Colab__: see our [Colab Tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
259
+ which has step-by-step instructions.
260
+
261
+ * __Docker__: The official [Dockerfile](docker) installs detectron2 with a few simple commands.
262
+
detectron2/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
detectron2/MODEL_ZOO.md ADDED
@@ -0,0 +1,1052 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Detectron2 Model Zoo and Baselines
2
+
3
+ ## Introduction
4
+
5
+ This file documents a large collection of baselines trained
6
+ with detectron2 in Sep-Oct, 2019.
7
+ All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
8
+ servers with 8 NVIDIA V100 GPUs & NVLink. The speed numbers are periodically updated with latest PyTorch/CUDA/cuDNN versions.
9
+ You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
10
+
11
+ In addition to these official baseline models, you can find more models in [projects/](projects/).
12
+
13
+ #### How to Read the Tables
14
+ * The "Name" column contains a link to the config file. Models can be reproduced using `tools/train_net.py` with the corresponding yaml config file,
15
+ or `tools/lazyconfig_train_net.py` for python config files.
16
+ * Training speed is averaged across the entire training.
17
+ We keep updating the speed with latest version of detectron2/pytorch/etc.,
18
+ so they might be different from the `metrics` file.
19
+ Training speed for multi-machine jobs is not provided.
20
+ * Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
21
+ with batch size 1 in detectron2 directly.
22
+ Measuring it with custom code may introduce other overhead.
23
+ Actual deployment in production should in general be faster than the given inference
24
+ speed due to more optimizations.
25
+ * The *model id* column is provided for ease of reference.
26
+ To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
27
+ * Training curves and other statistics can be found in `metrics` for each model.
28
+
29
+ #### Common Settings for COCO Models
30
+ * All COCO models were trained on `train2017` and evaluated on `val2017`.
31
+ * The default settings are __not directly comparable__ with Detectron's standard settings.
32
+ For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
33
+
34
+ To make fair comparisons with Detectron's settings, see
35
+ [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
36
+ and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
37
+ for speed comparison.
38
+ * For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
39
+ * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
40
+ respectively. It obtains the best
41
+ speed/accuracy tradeoff, but the other two are still useful for research.
42
+ * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
43
+ * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
44
+ for mask and box prediction, respectively.
45
+ This is used by the Deformable ConvNet paper.
46
+ * Most models are trained with the 3x schedule (~37 COCO epochs).
47
+ Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
48
+ training schedule for comparison when doing quick research iteration.
49
+
50
+ #### ImageNet Pretrained Models
51
+
52
+ It's common to initialize from backbone models pre-trained on ImageNet classification tasks. The following backbone models are available:
53
+
54
+ * [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
55
+ * [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
56
+ * [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
57
+ * [R-50.pkl (torchvision)](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/torchvision/R-50.pkl): converted copy of [torchvision's ResNet-50](https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.resnet50) model.
58
+ More details can be found in [the conversion script](tools/convert-torchvision-to-d2.py).
59
+
60
+ Note that the above models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
61
+ Pretrained models in Detectron's format can still be used. For example:
62
+ * [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
63
+ ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
64
+ * [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
65
+ ResNet-50 with Group Normalization.
66
+ * [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
67
+ ResNet-101 with Group Normalization.
68
+
69
+ These models require slightly different settings regarding normalization and architecture. See the model zoo configs for reference.
70
+
71
+ #### License
72
+
73
+ All models available for download through this document are licensed under the
74
+ [Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
75
+
76
+ ### COCO Object Detection Baselines
77
+
78
+ #### Faster R-CNN:
79
+ <!--
80
+ (fb only) To update the table in vim:
81
+ 1. Remove the old table: d}
82
+ 2. Copy the below command to the place of the table
83
+ 3. :.!bash
84
+
85
+ ./gen_html_table.py --config 'COCO-Detection/faster*50*'{1x,3x}'*' 'COCO-Detection/faster*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP
86
+ -->
87
+
88
+
89
+ <table><tbody>
90
+ <!-- START TABLE -->
91
+ <!-- TABLE HEADER -->
92
+ <th valign="bottom">Name</th>
93
+ <th valign="bottom">lr<br/>sched</th>
94
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
95
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
96
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
97
+ <th valign="bottom">box<br/>AP</th>
98
+ <th valign="bottom">model id</th>
99
+ <th valign="bottom">download</th>
100
+ <!-- TABLE BODY -->
101
+ <!-- ROW: faster_rcnn_R_50_C4_1x -->
102
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
103
+ <td align="center">1x</td>
104
+ <td align="center">0.551</td>
105
+ <td align="center">0.102</td>
106
+ <td align="center">4.8</td>
107
+ <td align="center">35.7</td>
108
+ <td align="center">137257644</td>
109
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a></td>
110
+ </tr>
111
+ <!-- ROW: faster_rcnn_R_50_DC5_1x -->
112
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
113
+ <td align="center">1x</td>
114
+ <td align="center">0.380</td>
115
+ <td align="center">0.068</td>
116
+ <td align="center">5.0</td>
117
+ <td align="center">37.3</td>
118
+ <td align="center">137847829</td>
119
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/model_final_51d356.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/metrics.json">metrics</a></td>
120
+ </tr>
121
+ <!-- ROW: faster_rcnn_R_50_FPN_1x -->
122
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
123
+ <td align="center">1x</td>
124
+ <td align="center">0.210</td>
125
+ <td align="center">0.038</td>
126
+ <td align="center">3.0</td>
127
+ <td align="center">37.9</td>
128
+ <td align="center">137257794</td>
129
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/metrics.json">metrics</a></td>
130
+ </tr>
131
+ <!-- ROW: faster_rcnn_R_50_C4_3x -->
132
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
133
+ <td align="center">3x</td>
134
+ <td align="center">0.543</td>
135
+ <td align="center">0.104</td>
136
+ <td align="center">4.8</td>
137
+ <td align="center">38.4</td>
138
+ <td align="center">137849393</td>
139
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a></td>
140
+ </tr>
141
+ <!-- ROW: faster_rcnn_R_50_DC5_3x -->
142
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
143
+ <td align="center">3x</td>
144
+ <td align="center">0.378</td>
145
+ <td align="center">0.070</td>
146
+ <td align="center">5.0</td>
147
+ <td align="center">39.0</td>
148
+ <td align="center">137849425</td>
149
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/model_final_68d202.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/metrics.json">metrics</a></td>
150
+ </tr>
151
+ <!-- ROW: faster_rcnn_R_50_FPN_3x -->
152
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
153
+ <td align="center">3x</td>
154
+ <td align="center">0.209</td>
155
+ <td align="center">0.038</td>
156
+ <td align="center">3.0</td>
157
+ <td align="center">40.2</td>
158
+ <td align="center">137849458</td>
159
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/metrics.json">metrics</a></td>
160
+ </tr>
161
+ <!-- ROW: faster_rcnn_R_101_C4_3x -->
162
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
163
+ <td align="center">3x</td>
164
+ <td align="center">0.619</td>
165
+ <td align="center">0.139</td>
166
+ <td align="center">5.9</td>
167
+ <td align="center">41.1</td>
168
+ <td align="center">138204752</td>
169
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a></td>
170
+ </tr>
171
+ <!-- ROW: faster_rcnn_R_101_DC5_3x -->
172
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
173
+ <td align="center">3x</td>
174
+ <td align="center">0.452</td>
175
+ <td align="center">0.086</td>
176
+ <td align="center">6.1</td>
177
+ <td align="center">40.6</td>
178
+ <td align="center">138204841</td>
179
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/model_final_3e0943.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/metrics.json">metrics</a></td>
180
+ </tr>
181
+ <!-- ROW: faster_rcnn_R_101_FPN_3x -->
182
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
183
+ <td align="center">3x</td>
184
+ <td align="center">0.286</td>
185
+ <td align="center">0.051</td>
186
+ <td align="center">4.1</td>
187
+ <td align="center">42.0</td>
188
+ <td align="center">137851257</td>
189
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/metrics.json">metrics</a></td>
190
+ </tr>
191
+ <!-- ROW: faster_rcnn_X_101_32x8d_FPN_3x -->
192
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
193
+ <td align="center">3x</td>
194
+ <td align="center">0.638</td>
195
+ <td align="center">0.098</td>
196
+ <td align="center">6.7</td>
197
+ <td align="center">43.0</td>
198
+ <td align="center">139173657</td>
199
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/metrics.json">metrics</a></td>
200
+ </tr>
201
+ </tbody></table>
202
+
203
+ #### RetinaNet:
204
+ <!--
205
+ ./gen_html_table.py --config 'COCO-Detection/retina*50*' 'COCO-Detection/retina*101*' --name R50 R50 R101 --fields lr_sched train_speed inference_speed mem box_AP
206
+ -->
207
+
208
+ <table><tbody>
209
+ <!-- START TABLE -->
210
+ <!-- TABLE HEADER -->
211
+ <th valign="bottom">Name</th>
212
+ <th valign="bottom">lr<br/>sched</th>
213
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
214
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
215
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
216
+ <th valign="bottom">box<br/>AP</th>
217
+ <th valign="bottom">model id</th>
218
+ <th valign="bottom">download</th>
219
+ <!-- TABLE BODY -->
220
+ <!-- ROW: retinanet_R_50_FPN_1x -->
221
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml">R50</a></td>
222
+ <td align="center">1x</td>
223
+ <td align="center">0.205</td>
224
+ <td align="center">0.041</td>
225
+ <td align="center">4.1</td>
226
+ <td align="center">37.4</td>
227
+ <td align="center">190397773</td>
228
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/model_final_bfca0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/metrics.json">metrics</a></td>
229
+ </tr>
230
+ <!-- ROW: retinanet_R_50_FPN_3x -->
231
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml">R50</a></td>
232
+ <td align="center">3x</td>
233
+ <td align="center">0.205</td>
234
+ <td align="center">0.041</td>
235
+ <td align="center">4.1</td>
236
+ <td align="center">38.7</td>
237
+ <td align="center">190397829</td>
238
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/metrics.json">metrics</a></td>
239
+ </tr>
240
+ <!-- ROW: retinanet_R_101_FPN_3x -->
241
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml">R101</a></td>
242
+ <td align="center">3x</td>
243
+ <td align="center">0.291</td>
244
+ <td align="center">0.054</td>
245
+ <td align="center">5.2</td>
246
+ <td align="center">40.4</td>
247
+ <td align="center">190397697</td>
248
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/190397697/model_final_971ab9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/190397697/metrics.json">metrics</a></td>
249
+ </tr>
250
+ </tbody></table>
251
+
252
+
253
+ #### RPN & Fast R-CNN:
254
+ <!--
255
+ ./gen_html_table.py --config 'COCO-Detection/rpn*' 'COCO-Detection/fast_rcnn*' --name "RPN R50-C4" "RPN R50-FPN" "Fast R-CNN R50-FPN" --fields lr_sched train_speed inference_speed mem box_AP prop_AR
256
+ -->
257
+
258
+ <table><tbody>
259
+ <!-- START TABLE -->
260
+ <!-- TABLE HEADER -->
261
+ <th valign="bottom">Name</th>
262
+ <th valign="bottom">lr<br/>sched</th>
263
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
264
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
265
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
266
+ <th valign="bottom">box<br/>AP</th>
267
+ <th valign="bottom">prop.<br/>AR</th>
268
+ <th valign="bottom">model id</th>
269
+ <th valign="bottom">download</th>
270
+ <!-- TABLE BODY -->
271
+ <!-- ROW: rpn_R_50_C4_1x -->
272
+ <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_C4_1x.yaml">RPN R50-C4</a></td>
273
+ <td align="center">1x</td>
274
+ <td align="center">0.130</td>
275
+ <td align="center">0.034</td>
276
+ <td align="center">1.5</td>
277
+ <td align="center"></td>
278
+ <td align="center">51.6</td>
279
+ <td align="center">137258005</td>
280
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/model_final_450694.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/metrics.json">metrics</a></td>
281
+ </tr>
282
+ <!-- ROW: rpn_R_50_FPN_1x -->
283
+ <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_FPN_1x.yaml">RPN R50-FPN</a></td>
284
+ <td align="center">1x</td>
285
+ <td align="center">0.186</td>
286
+ <td align="center">0.032</td>
287
+ <td align="center">2.7</td>
288
+ <td align="center"></td>
289
+ <td align="center">58.0</td>
290
+ <td align="center">137258492</td>
291
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/metrics.json">metrics</a></td>
292
+ </tr>
293
+ <!-- ROW: fast_rcnn_R_50_FPN_1x -->
294
+ <tr><td align="left"><a href="configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml">Fast R-CNN R50-FPN</a></td>
295
+ <td align="center">1x</td>
296
+ <td align="center">0.140</td>
297
+ <td align="center">0.029</td>
298
+ <td align="center">2.6</td>
299
+ <td align="center">37.8</td>
300
+ <td align="center"></td>
301
+ <td align="center">137635226</td>
302
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/metrics.json">metrics</a></td>
303
+ </tr>
304
+ </tbody></table>
305
+
306
+ ### COCO Instance Segmentation Baselines with Mask R-CNN
307
+ <!--
308
+ ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask*50*'{1x,3x}'*' 'COCO-InstanceSegmentation/mask*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
309
+ -->
310
+
311
+
312
+
313
+ <table><tbody>
314
+ <!-- START TABLE -->
315
+ <!-- TABLE HEADER -->
316
+ <th valign="bottom">Name</th>
317
+ <th valign="bottom">lr<br/>sched</th>
318
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
319
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
320
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
321
+ <th valign="bottom">box<br/>AP</th>
322
+ <th valign="bottom">mask<br/>AP</th>
323
+ <th valign="bottom">model id</th>
324
+ <th valign="bottom">download</th>
325
+ <!-- TABLE BODY -->
326
+ <!-- ROW: mask_rcnn_R_50_C4_1x -->
327
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
328
+ <td align="center">1x</td>
329
+ <td align="center">0.584</td>
330
+ <td align="center">0.110</td>
331
+ <td align="center">5.2</td>
332
+ <td align="center">36.8</td>
333
+ <td align="center">32.2</td>
334
+ <td align="center">137259246</td>
335
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/metrics.json">metrics</a></td>
336
+ </tr>
337
+ <!-- ROW: mask_rcnn_R_50_DC5_1x -->
338
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
339
+ <td align="center">1x</td>
340
+ <td align="center">0.471</td>
341
+ <td align="center">0.076</td>
342
+ <td align="center">6.5</td>
343
+ <td align="center">38.3</td>
344
+ <td align="center">34.2</td>
345
+ <td align="center">137260150</td>
346
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/model_final_4f86c3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/metrics.json">metrics</a></td>
347
+ </tr>
348
+ <!-- ROW: mask_rcnn_R_50_FPN_1x -->
349
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
350
+ <td align="center">1x</td>
351
+ <td align="center">0.261</td>
352
+ <td align="center">0.043</td>
353
+ <td align="center">3.4</td>
354
+ <td align="center">38.6</td>
355
+ <td align="center">35.2</td>
356
+ <td align="center">137260431</td>
357
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
358
+ </tr>
359
+ <!-- ROW: mask_rcnn_R_50_C4_3x -->
360
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
361
+ <td align="center">3x</td>
362
+ <td align="center">0.575</td>
363
+ <td align="center">0.111</td>
364
+ <td align="center">5.2</td>
365
+ <td align="center">39.8</td>
366
+ <td align="center">34.4</td>
367
+ <td align="center">137849525</td>
368
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/metrics.json">metrics</a></td>
369
+ </tr>
370
+ <!-- ROW: mask_rcnn_R_50_DC5_3x -->
371
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
372
+ <td align="center">3x</td>
373
+ <td align="center">0.470</td>
374
+ <td align="center">0.076</td>
375
+ <td align="center">6.5</td>
376
+ <td align="center">40.0</td>
377
+ <td align="center">35.9</td>
378
+ <td align="center">137849551</td>
379
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/metrics.json">metrics</a></td>
380
+ </tr>
381
+ <!-- ROW: mask_rcnn_R_50_FPN_3x -->
382
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
383
+ <td align="center">3x</td>
384
+ <td align="center">0.261</td>
385
+ <td align="center">0.043</td>
386
+ <td align="center">3.4</td>
387
+ <td align="center">41.0</td>
388
+ <td align="center">37.2</td>
389
+ <td align="center">137849600</td>
390
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
391
+ </tr>
392
+ <!-- ROW: mask_rcnn_R_101_C4_3x -->
393
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
394
+ <td align="center">3x</td>
395
+ <td align="center">0.652</td>
396
+ <td align="center">0.145</td>
397
+ <td align="center">6.3</td>
398
+ <td align="center">42.6</td>
399
+ <td align="center">36.7</td>
400
+ <td align="center">138363239</td>
401
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/model_final_a2914c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/metrics.json">metrics</a></td>
402
+ </tr>
403
+ <!-- ROW: mask_rcnn_R_101_DC5_3x -->
404
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
405
+ <td align="center">3x</td>
406
+ <td align="center">0.545</td>
407
+ <td align="center">0.092</td>
408
+ <td align="center">7.6</td>
409
+ <td align="center">41.9</td>
410
+ <td align="center">37.3</td>
411
+ <td align="center">138363294</td>
412
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/model_final_0464b7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/metrics.json">metrics</a></td>
413
+ </tr>
414
+ <!-- ROW: mask_rcnn_R_101_FPN_3x -->
415
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
416
+ <td align="center">3x</td>
417
+ <td align="center">0.340</td>
418
+ <td align="center">0.056</td>
419
+ <td align="center">4.6</td>
420
+ <td align="center">42.9</td>
421
+ <td align="center">38.6</td>
422
+ <td align="center">138205316</td>
423
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a></td>
424
+ </tr>
425
+ <!-- ROW: mask_rcnn_X_101_32x8d_FPN_3x -->
426
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
427
+ <td align="center">3x</td>
428
+ <td align="center">0.690</td>
429
+ <td align="center">0.103</td>
430
+ <td align="center">7.2</td>
431
+ <td align="center">44.3</td>
432
+ <td align="center">39.5</td>
433
+ <td align="center">139653917</td>
434
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a></td>
435
+ </tr>
436
+ </tbody></table>
437
+
438
+
439
+
440
+ #### New baselines using Large-Scale Jitter and Longer Training Schedule
441
+
442
+ The following baselines of COCO Instance Segmentation with Mask R-CNN are generated
443
+ using a longer training schedule and large-scale jitter as described in Google's
444
+ [Simple Copy-Paste Data Augmentation](https://arxiv.org/pdf/2012.07177.pdf) paper. These
445
+ models are trained from scratch using random initialization. These baselines exceed the
446
+ previous Mask R-CNN baselines.
447
+
448
+ In the following table, one epoch consists of training on 118000 COCO images.
449
+
450
+ <table><tbody>
451
+ <!-- START TABLE -->
452
+ <!-- TABLE HEADER -->
453
+ <th valign="bottom">Name</th>
454
+ <th valign="bottom">epochs</th>
455
+ <th valign="bottom">train<br/>time<br/>(s/im)</th>
456
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
457
+ <th valign="bottom">box<br/>AP</th>
458
+ <th valign="bottom">mask<br/>AP</th>
459
+ <th valign="bottom">model id</th>
460
+ <th valign="bottom">download</th>
461
+ <!-- TABLE BODY -->
462
+ <!-- ROW: mask_rcnn_R_50_FPN_100ep_LSJ -->
463
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py">R50-FPN</a></td>
464
+ <td align="center">100</td>
465
+ <td align="center">0.376</td>
466
+ <td align="center">0.069</td>
467
+ <td align="center">44.6</td>
468
+ <td align="center">40.3</td>
469
+ <td align="center">42047764</td>
470
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ/42047764/model_final_bb69de.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ/42047764/metrics.json">metrics</a></td>
471
+ </tr>
472
+ <!-- ROW: mask_rcnn_R_50_FPN_200ep_LSJ -->
473
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py">R50-FPN</a></td>
474
+ <td align="center">200</td>
475
+ <td align="center">0.376</td>
476
+ <td align="center">0.069</td>
477
+ <td align="center">46.3</td>
478
+ <td align="center">41.7</td>
479
+ <td align="center">42047638</td>
480
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ/42047638/model_final_89a8d3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ/42047638/metrics.json">metrics</a></td>
481
+ </tr>
482
+ <!-- ROW: mask_rcnn_R_50_FPN_400ep_LSJ -->
483
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py">R50-FPN</a></td>
484
+ <td align="center">400</td>
485
+ <td align="center">0.376</td>
486
+ <td align="center">0.069</td>
487
+ <td align="center">47.4</td>
488
+ <td align="center">42.5</td>
489
+ <td align="center">42019571</td>
490
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ/42019571/model_final_14d201.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ/42019571/metrics.json">metrics</a></td>
491
+ </tr>
492
+ <!-- ROW: mask_rcnn_R_101_FPN_100ep_LSJ -->
493
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py">R101-FPN</a></td>
494
+ <td align="center">100</td>
495
+ <td align="center">0.518</td>
496
+ <td align="center">0.073</td>
497
+ <td align="center">46.4</td>
498
+ <td align="center">41.6</td>
499
+ <td align="center">42025812</td>
500
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ/42025812/model_final_4f7b58.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ/42025812/metrics.json">metrics</a></td>
501
+ </tr>
502
+ <!-- ROW: mask_rcnn_R_101_FPN_200ep_LSJ -->
503
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py">R101-FPN</a></td>
504
+ <td align="center">200</td>
505
+ <td align="center">0.518</td>
506
+ <td align="center">0.073</td>
507
+ <td align="center">48.0</td>
508
+ <td align="center">43.1</td>
509
+ <td align="center">42131867</td>
510
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ/42131867/model_final_0bb7ae.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ/42131867/metrics.json">metrics</a></td>
511
+ </tr>
512
+ <!-- ROW: mask_rcnn_R_101_FPN_400ep_LSJ -->
513
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py">R101-FPN</a></td>
514
+ <td align="center">400</td>
515
+ <td align="center">0.518</td>
516
+ <td align="center">0.073</td>
517
+ <td align="center">48.9</td>
518
+ <td align="center">43.7</td>
519
+ <td align="center">42073830</td>
520
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ/42073830/model_final_f96b26.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ/42073830/metrics.json">metrics</a></td>
521
+ </tr>
522
+ <!-- ROW: mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ -->
523
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py">regnetx_4gf_dds_FPN</a></td>
524
+ <td align="center">100</td>
525
+ <td align="center">0.474</td>
526
+ <td align="center">0.071</td>
527
+ <td align="center">46.0</td>
528
+ <td align="center">41.3</td>
529
+ <td align="center">42047771</td>
530
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ/42047771/model_final_b7fbab.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ/42047771/metrics.json">metrics</a></td>
531
+ </tr>
532
+ <!-- ROW: mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ -->
533
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py">regnetx_4gf_dds_FPN</a></td>
534
+ <td align="center">200</td>
535
+ <td align="center">0.474</td>
536
+ <td align="center">0.071</td>
537
+ <td align="center">48.1</td>
538
+ <td align="center">43.1</td>
539
+ <td align="center">42132721</td>
540
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ/42132721/model_final_5d87c1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ/42132721/metrics.json">metrics</a></td>
541
+ </tr>
542
+ <!-- ROW: mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ -->
543
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py">regnetx_4gf_dds_FPN</a></td>
544
+ <td align="center">400</td>
545
+ <td align="center">0.474</td>
546
+ <td align="center">0.071</td>
547
+ <td align="center">48.6</td>
548
+ <td align="center">43.5</td>
549
+ <td align="center">42025447</td>
550
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ/42025447/model_final_f1362d.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ/42025447/metrics.json">metrics</a></td>
551
+ </tr>
552
+ <!-- ROW: mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ -->
553
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py">regnety_4gf_dds_FPN</a></td>
554
+ <td align="center">100</td>
555
+ <td align="center">0.487</td>
556
+ <td align="center">0.073</td>
557
+ <td align="center">46.1</td>
558
+ <td align="center">41.6</td>
559
+ <td align="center">42047784</td>
560
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ/42047784/model_final_6ba57e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ/42047784/metrics.json">metrics</a></td>
561
+ </tr>
562
+ <!-- ROW: mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ -->
563
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py">regnety_4gf_dds_FPN</a></td>
564
+ <td align="center">200</td>
565
+ <td align="center">0.487</td>
566
+ <td align="center">0.072</td>
567
+ <td align="center">47.8</td>
568
+ <td align="center">43.0</td>
569
+ <td align="center">42047642</td>
570
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ/42047642/model_final_27b9c1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ/42047642/metrics.json">metrics</a></td>
571
+ </tr>
572
+ <!-- ROW: mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ -->
573
+ <tr><td align="left"><a href="configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py">regnety_4gf_dds_FPN</a></td>
574
+ <td align="center">400</td>
575
+ <td align="center">0.487</td>
576
+ <td align="center">0.072</td>
577
+ <td align="center">48.2</td>
578
+ <td align="center">43.3</td>
579
+ <td align="center">42045954</td>
580
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ/42045954/model_final_ef3a80.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ/42045954/metrics.json">metrics</a></td>
581
+ </tr>
582
+ </tbody></table>
583
+
584
+ ### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
585
+ <!--
586
+ ./gen_html_table.py --config 'COCO-Keypoints/*50*' 'COCO-Keypoints/*101*' --name R50-FPN R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP keypoint_AP
587
+ -->
588
+
589
+
590
+ <table><tbody>
591
+ <!-- START TABLE -->
592
+ <!-- TABLE HEADER -->
593
+ <th valign="bottom">Name</th>
594
+ <th valign="bottom">lr<br/>sched</th>
595
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
596
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
597
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
598
+ <th valign="bottom">box<br/>AP</th>
599
+ <th valign="bottom">kp.<br/>AP</th>
600
+ <th valign="bottom">model id</th>
601
+ <th valign="bottom">download</th>
602
+ <!-- TABLE BODY -->
603
+ <!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
604
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
605
+ <td align="center">1x</td>
606
+ <td align="center">0.315</td>
607
+ <td align="center">0.072</td>
608
+ <td align="center">5.0</td>
609
+ <td align="center">53.6</td>
610
+ <td align="center">64.0</td>
611
+ <td align="center">137261548</td>
612
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/model_final_04e291.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/metrics.json">metrics</a></td>
613
+ </tr>
614
+ <!-- ROW: keypoint_rcnn_R_50_FPN_3x -->
615
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
616
+ <td align="center">3x</td>
617
+ <td align="center">0.316</td>
618
+ <td align="center">0.066</td>
619
+ <td align="center">5.0</td>
620
+ <td align="center">55.4</td>
621
+ <td align="center">65.5</td>
622
+ <td align="center">137849621</td>
623
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/metrics.json">metrics</a></td>
624
+ </tr>
625
+ <!-- ROW: keypoint_rcnn_R_101_FPN_3x -->
626
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
627
+ <td align="center">3x</td>
628
+ <td align="center">0.390</td>
629
+ <td align="center">0.076</td>
630
+ <td align="center">6.1</td>
631
+ <td align="center">56.4</td>
632
+ <td align="center">66.1</td>
633
+ <td align="center">138363331</td>
634
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/metrics.json">metrics</a></td>
635
+ </tr>
636
+ <!-- ROW: keypoint_rcnn_X_101_32x8d_FPN_3x -->
637
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
638
+ <td align="center">3x</td>
639
+ <td align="center">0.738</td>
640
+ <td align="center">0.121</td>
641
+ <td align="center">8.7</td>
642
+ <td align="center">57.3</td>
643
+ <td align="center">66.0</td>
644
+ <td align="center">139686956</td>
645
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/metrics.json">metrics</a></td>
646
+ </tr>
647
+ </tbody></table>
648
+
649
+ ### COCO Panoptic Segmentation Baselines with Panoptic FPN
650
+ <!--
651
+ ./gen_html_table.py --config 'COCO-PanopticSegmentation/*50*' 'COCO-PanopticSegmentation/*101*' --name R50-FPN R50-FPN R101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP PQ
652
+ -->
653
+
654
+
655
+ <table><tbody>
656
+ <!-- START TABLE -->
657
+ <!-- TABLE HEADER -->
658
+ <th valign="bottom">Name</th>
659
+ <th valign="bottom">lr<br/>sched</th>
660
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
661
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
662
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
663
+ <th valign="bottom">box<br/>AP</th>
664
+ <th valign="bottom">mask<br/>AP</th>
665
+ <th valign="bottom">PQ</th>
666
+ <th valign="bottom">model id</th>
667
+ <th valign="bottom">download</th>
668
+ <!-- TABLE BODY -->
669
+ <!-- ROW: panoptic_fpn_R_50_1x -->
670
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml">R50-FPN</a></td>
671
+ <td align="center">1x</td>
672
+ <td align="center">0.304</td>
673
+ <td align="center">0.053</td>
674
+ <td align="center">4.8</td>
675
+ <td align="center">37.6</td>
676
+ <td align="center">34.7</td>
677
+ <td align="center">39.4</td>
678
+ <td align="center">139514544</td>
679
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/model_final_dbfeb4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/metrics.json">metrics</a></td>
680
+ </tr>
681
+ <!-- ROW: panoptic_fpn_R_50_3x -->
682
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml">R50-FPN</a></td>
683
+ <td align="center">3x</td>
684
+ <td align="center">0.302</td>
685
+ <td align="center">0.053</td>
686
+ <td align="center">4.8</td>
687
+ <td align="center">40.0</td>
688
+ <td align="center">36.5</td>
689
+ <td align="center">41.5</td>
690
+ <td align="center">139514569</td>
691
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/metrics.json">metrics</a></td>
692
+ </tr>
693
+ <!-- ROW: panoptic_fpn_R_101_3x -->
694
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml">R101-FPN</a></td>
695
+ <td align="center">3x</td>
696
+ <td align="center">0.392</td>
697
+ <td align="center">0.066</td>
698
+ <td align="center">6.0</td>
699
+ <td align="center">42.4</td>
700
+ <td align="center">38.5</td>
701
+ <td align="center">43.0</td>
702
+ <td align="center">139514519</td>
703
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/metrics.json">metrics</a></td>
704
+ </tr>
705
+ </tbody></table>
706
+
707
+
708
+ ### LVIS Instance Segmentation Baselines with Mask R-CNN
709
+
710
+ Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
711
+ These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
712
+
713
+ NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
714
+ They are roughly 24 epochs of LVISv0.5 data.
715
+ The final results of these configs have large variance across different runs.
716
+
717
+ <!--
718
+ ./gen_html_table.py --config 'LVISv0.5-InstanceSegmentation/mask*50*' 'LVISv0.5-InstanceSegmentation/mask*101*' --name R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
719
+ -->
720
+
721
+
722
+ <table><tbody>
723
+ <!-- START TABLE -->
724
+ <!-- TABLE HEADER -->
725
+ <th valign="bottom">Name</th>
726
+ <th valign="bottom">lr<br/>sched</th>
727
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
728
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
729
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
730
+ <th valign="bottom">box<br/>AP</th>
731
+ <th valign="bottom">mask<br/>AP</th>
732
+ <th valign="bottom">model id</th>
733
+ <th valign="bottom">download</th>
734
+ <!-- TABLE BODY -->
735
+ <!-- ROW: mask_rcnn_R_50_FPN_1x -->
736
+ <tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
737
+ <td align="center">1x</td>
738
+ <td align="center">0.292</td>
739
+ <td align="center">0.107</td>
740
+ <td align="center">7.1</td>
741
+ <td align="center">23.6</td>
742
+ <td align="center">24.4</td>
743
+ <td align="center">144219072</td>
744
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/metrics.json">metrics</a></td>
745
+ </tr>
746
+ <!-- ROW: mask_rcnn_R_101_FPN_1x -->
747
+ <tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml">R101-FPN</a></td>
748
+ <td align="center">1x</td>
749
+ <td align="center">0.371</td>
750
+ <td align="center">0.114</td>
751
+ <td align="center">7.8</td>
752
+ <td align="center">25.6</td>
753
+ <td align="center">25.9</td>
754
+ <td align="center">144219035</td>
755
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/model_final_824ab5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/metrics.json">metrics</a></td>
756
+ </tr>
757
+ <!-- ROW: mask_rcnn_X_101_32x8d_FPN_1x -->
758
+ <tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml">X101-FPN</a></td>
759
+ <td align="center">1x</td>
760
+ <td align="center">0.712</td>
761
+ <td align="center">0.151</td>
762
+ <td align="center">10.2</td>
763
+ <td align="center">26.7</td>
764
+ <td align="center">27.1</td>
765
+ <td align="center">144219108</td>
766
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/model_final_5e3439.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/metrics.json">metrics</a></td>
767
+ </tr>
768
+ </tbody></table>
769
+
770
+
771
+
772
+ ### Cityscapes & Pascal VOC Baselines
773
+
774
+ Simple baselines for
775
+ * Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
776
+ * Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
777
+
778
+ <!--
779
+ ./gen_html_table.py --config 'Cityscapes/*' 'PascalVOC-Detection/*' --name "R50-FPN, Cityscapes" "R50-C4, VOC" --fields train_speed inference_speed mem box_AP box_AP50 mask_AP
780
+ -->
781
+
782
+
783
+ <table><tbody>
784
+ <!-- START TABLE -->
785
+ <!-- TABLE HEADER -->
786
+ <th valign="bottom">Name</th>
787
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
788
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
789
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
790
+ <th valign="bottom">box<br/>AP</th>
791
+ <th valign="bottom">box<br/>AP50</th>
792
+ <th valign="bottom">mask<br/>AP</th>
793
+ <th valign="bottom">model id</th>
794
+ <th valign="bottom">download</th>
795
+ <!-- TABLE BODY -->
796
+ <!-- ROW: mask_rcnn_R_50_FPN -->
797
+ <tr><td align="left"><a href="configs/Cityscapes/mask_rcnn_R_50_FPN.yaml">R50-FPN, Cityscapes</a></td>
798
+ <td align="center">0.240</td>
799
+ <td align="center">0.078</td>
800
+ <td align="center">4.4</td>
801
+ <td align="center"></td>
802
+ <td align="center"></td>
803
+ <td align="center">36.5</td>
804
+ <td align="center">142423278</td>
805
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/model_final_af9cf5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/metrics.json">metrics</a></td>
806
+ </tr>
807
+ <!-- ROW: faster_rcnn_R_50_C4 -->
808
+ <tr><td align="left"><a href="configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml">R50-C4, VOC</a></td>
809
+ <td align="center">0.537</td>
810
+ <td align="center">0.081</td>
811
+ <td align="center">4.8</td>
812
+ <td align="center">51.9</td>
813
+ <td align="center">80.3</td>
814
+ <td align="center"></td>
815
+ <td align="center">142202221</td>
816
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/model_final_b1acc2.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/metrics.json">metrics</a></td>
817
+ </tr>
818
+ </tbody></table>
819
+
820
+
821
+
822
+ ### Other Settings
823
+
824
+ Ablations for Deformable Conv and Cascade R-CNN:
825
+
826
+ <!--
827
+ ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml' 'Misc/*R_50_FPN_1x_dconv*' 'Misc/cascade*1x.yaml' 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/*R_50_FPN_3x_dconv*' 'Misc/cascade*3x.yaml' --name "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
828
+ -->
829
+
830
+
831
+ <table><tbody>
832
+ <!-- START TABLE -->
833
+ <!-- TABLE HEADER -->
834
+ <th valign="bottom">Name</th>
835
+ <th valign="bottom">lr<br/>sched</th>
836
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
837
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
838
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
839
+ <th valign="bottom">box<br/>AP</th>
840
+ <th valign="bottom">mask<br/>AP</th>
841
+ <th valign="bottom">model id</th>
842
+ <th valign="bottom">download</th>
843
+ <!-- TABLE BODY -->
844
+ <!-- ROW: mask_rcnn_R_50_FPN_1x -->
845
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">Baseline R50-FPN</a></td>
846
+ <td align="center">1x</td>
847
+ <td align="center">0.261</td>
848
+ <td align="center">0.043</td>
849
+ <td align="center">3.4</td>
850
+ <td align="center">38.6</td>
851
+ <td align="center">35.2</td>
852
+ <td align="center">137260431</td>
853
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
854
+ </tr>
855
+ <!-- ROW: mask_rcnn_R_50_FPN_1x_dconv_c3-c5 -->
856
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml">Deformable Conv</a></td>
857
+ <td align="center">1x</td>
858
+ <td align="center">0.342</td>
859
+ <td align="center">0.048</td>
860
+ <td align="center">3.5</td>
861
+ <td align="center">41.5</td>
862
+ <td align="center">37.5</td>
863
+ <td align="center">138602867</td>
864
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/model_final_65c703.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/metrics.json">metrics</a></td>
865
+ </tr>
866
+ <!-- ROW: cascade_mask_rcnn_R_50_FPN_1x -->
867
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml">Cascade R-CNN</a></td>
868
+ <td align="center">1x</td>
869
+ <td align="center">0.317</td>
870
+ <td align="center">0.052</td>
871
+ <td align="center">4.0</td>
872
+ <td align="center">42.1</td>
873
+ <td align="center">36.4</td>
874
+ <td align="center">138602847</td>
875
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/model_final_e9d89b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/metrics.json">metrics</a></td>
876
+ </tr>
877
+ <!-- ROW: mask_rcnn_R_50_FPN_3x -->
878
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
879
+ <td align="center">3x</td>
880
+ <td align="center">0.261</td>
881
+ <td align="center">0.043</td>
882
+ <td align="center">3.4</td>
883
+ <td align="center">41.0</td>
884
+ <td align="center">37.2</td>
885
+ <td align="center">137849600</td>
886
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
887
+ </tr>
888
+ <!-- ROW: mask_rcnn_R_50_FPN_3x_dconv_c3-c5 -->
889
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml">Deformable Conv</a></td>
890
+ <td align="center">3x</td>
891
+ <td align="center">0.349</td>
892
+ <td align="center">0.047</td>
893
+ <td align="center">3.5</td>
894
+ <td align="center">42.7</td>
895
+ <td align="center">38.5</td>
896
+ <td align="center">144998336</td>
897
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/model_final_821d0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/metrics.json">metrics</a></td>
898
+ </tr>
899
+ <!-- ROW: cascade_mask_rcnn_R_50_FPN_3x -->
900
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml">Cascade R-CNN</a></td>
901
+ <td align="center">3x</td>
902
+ <td align="center">0.328</td>
903
+ <td align="center">0.053</td>
904
+ <td align="center">4.0</td>
905
+ <td align="center">44.3</td>
906
+ <td align="center">38.5</td>
907
+ <td align="center">144998488</td>
908
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/metrics.json">metrics</a></td>
909
+ </tr>
910
+ </tbody></table>
911
+
912
+
913
+ Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
914
+ (Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
915
+ <!--
916
+ ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/mask*50_FPN_3x_gn.yaml' 'Misc/mask*50_FPN_3x_syncbn.yaml' 'Misc/scratch*' --name "Baseline R50-FPN" "GN" "SyncBN" "GN (from scratch)" "GN (from scratch)" "SyncBN (from scratch)" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
917
+ -->
918
+
919
+
920
+ <table><tbody>
921
+ <!-- START TABLE -->
922
+ <!-- TABLE HEADER -->
923
+ <th valign="bottom">Name</th>
924
+ <th valign="bottom">lr<br/>sched</th>
925
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
926
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
927
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
928
+ <th valign="bottom">box<br/>AP</th>
929
+ <th valign="bottom">mask<br/>AP</th>
930
+ <th valign="bottom">model id</th>
931
+ <th valign="bottom">download</th>
932
+ <!-- TABLE BODY -->
933
+ <!-- ROW: mask_rcnn_R_50_FPN_3x -->
934
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
935
+ <td align="center">3x</td>
936
+ <td align="center">0.261</td>
937
+ <td align="center">0.043</td>
938
+ <td align="center">3.4</td>
939
+ <td align="center">41.0</td>
940
+ <td align="center">37.2</td>
941
+ <td align="center">137849600</td>
942
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
943
+ </tr>
944
+ <!-- ROW: mask_rcnn_R_50_FPN_3x_gn -->
945
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml">GN</a></td>
946
+ <td align="center">3x</td>
947
+ <td align="center">0.309</td>
948
+ <td align="center">0.060</td>
949
+ <td align="center">5.6</td>
950
+ <td align="center">42.6</td>
951
+ <td align="center">38.6</td>
952
+ <td align="center">138602888</td>
953
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/model_final_dc5d9e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/metrics.json">metrics</a></td>
954
+ </tr>
955
+ <!-- ROW: mask_rcnn_R_50_FPN_3x_syncbn -->
956
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml">SyncBN</a></td>
957
+ <td align="center">3x</td>
958
+ <td align="center">0.345</td>
959
+ <td align="center">0.053</td>
960
+ <td align="center">5.5</td>
961
+ <td align="center">41.9</td>
962
+ <td align="center">37.8</td>
963
+ <td align="center">169527823</td>
964
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/model_final_3b3c51.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/metrics.json">metrics</a></td>
965
+ </tr>
966
+ <!-- ROW: scratch_mask_rcnn_R_50_FPN_3x_gn -->
967
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml">GN (from scratch)</a></td>
968
+ <td align="center">3x</td>
969
+ <td align="center">0.338</td>
970
+ <td align="center">0.061</td>
971
+ <td align="center">7.2</td>
972
+ <td align="center">39.9</td>
973
+ <td align="center">36.6</td>
974
+ <td align="center">138602908</td>
975
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/metrics.json">metrics</a></td>
976
+ </tr>
977
+ <!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_gn -->
978
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml">GN (from scratch)</a></td>
979
+ <td align="center">9x</td>
980
+ <td align="center">N/A</td>
981
+ <td align="center">0.061</td>
982
+ <td align="center">7.2</td>
983
+ <td align="center">43.7</td>
984
+ <td align="center">39.6</td>
985
+ <td align="center">183808979</td>
986
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/model_final_da7b4c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/metrics.json">metrics</a></td>
987
+ </tr>
988
+ <!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_syncbn -->
989
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml">SyncBN (from scratch)</a></td>
990
+ <td align="center">9x</td>
991
+ <td align="center">N/A</td>
992
+ <td align="center">0.055</td>
993
+ <td align="center">7.2</td>
994
+ <td align="center">43.6</td>
995
+ <td align="center">39.3</td>
996
+ <td align="center">184226666</td>
997
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/model_final_5ce33e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/metrics.json">metrics</a></td>
998
+ </tr>
999
+ </tbody></table>
1000
+
1001
+
1002
+ A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
1003
+
1004
+ <!--
1005
+ ./gen_html_table.py --config 'Misc/panoptic_*dconv*' 'Misc/cascade_*152*' --name "Panoptic FPN R101" "Mask R-CNN X152" --fields inference_speed mem box_AP mask_AP PQ
1006
+ # manually add TTA results
1007
+ -->
1008
+
1009
+
1010
+ <table><tbody>
1011
+ <!-- START TABLE -->
1012
+ <!-- TABLE HEADER -->
1013
+ <th valign="bottom">Name</th>
1014
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
1015
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
1016
+ <th valign="bottom">box<br/>AP</th>
1017
+ <th valign="bottom">mask<br/>AP</th>
1018
+ <th valign="bottom">PQ</th>
1019
+ <th valign="bottom">model id</th>
1020
+ <th valign="bottom">download</th>
1021
+ <!-- TABLE BODY -->
1022
+ <!-- ROW: panoptic_fpn_R_101_dconv_cascade_gn_3x -->
1023
+ <tr><td align="left"><a href="configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml">Panoptic FPN R101</a></td>
1024
+ <td align="center">0.098</td>
1025
+ <td align="center">11.4</td>
1026
+ <td align="center">47.4</td>
1027
+ <td align="center">41.3</td>
1028
+ <td align="center">46.1</td>
1029
+ <td align="center">139797668</td>
1030
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/model_final_be35db.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/metrics.json">metrics</a></td>
1031
+ </tr>
1032
+ <!-- ROW: cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
1033
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml">Mask R-CNN X152</a></td>
1034
+ <td align="center">0.234</td>
1035
+ <td align="center">15.1</td>
1036
+ <td align="center">50.2</td>
1037
+ <td align="center">44.0</td>
1038
+ <td align="center"></td>
1039
+ <td align="center">18131413</td>
1040
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/metrics.json">metrics</a></td>
1041
+ </tr>
1042
+ <!-- ROW: TTA cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
1043
+ <tr><td align="left">above + test-time aug.</td>
1044
+ <td align="center"></td>
1045
+ <td align="center"></td>
1046
+ <td align="center">51.9</td>
1047
+ <td align="center">45.9</td>
1048
+ <td align="center"></td>
1049
+ <td align="center"></td>
1050
+ <td align="center"></td>
1051
+ </tr>
1052
+ </tbody></table>
detectron2/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <img src=".github/Detectron2-Logo-Horz.svg" width="300" >
2
+
3
+ <a href="https://opensource.facebook.com/support-ukraine">
4
+ <img src="https://img.shields.io/badge/Support-Ukraine-FFD500?style=flat&labelColor=005BBB" alt="Support Ukraine - Help Provide Humanitarian Aid to Ukraine." />
5
+ </a>
6
+
7
+ Detectron2 is Facebook AI Research's next generation library
8
+ that provides state-of-the-art detection and segmentation algorithms.
9
+ It is the successor of
10
+ [Detectron](https://github.com/facebookresearch/Detectron/)
11
+ and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/).
12
+ It supports a number of computer vision research projects and production applications in Facebook.
13
+
14
+ <div align="center">
15
+ <img src="https://user-images.githubusercontent.com/1381301/66535560-d3422200-eace-11e9-9123-5535d469db19.png"/>
16
+ </div>
17
+ <br>
18
+
19
+ ## Learn More about Detectron2
20
+
21
+ Explain Like I’m 5: Detectron2 | Using Machine Learning with Detectron2
22
+ :-------------------------:|:-------------------------:
23
+ [![Explain Like I’m 5: Detectron2](https://img.youtube.com/vi/1oq1Ye7dFqc/0.jpg)](https://www.youtube.com/watch?v=1oq1Ye7dFqc) | [![Using Machine Learning with Detectron2](https://img.youtube.com/vi/eUSgtfK4ivk/0.jpg)](https://www.youtube.com/watch?v=eUSgtfK4ivk)
24
+
25
+ ## What's New
26
+ * Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend,
27
+ DeepLab, etc.
28
+ * Used as a library to support building [research projects](projects/) on top of it.
29
+ * Models can be exported to TorchScript format or Caffe2 format for deployment.
30
+ * It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html).
31
+
32
+ See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/)
33
+ to see more demos and learn about detectron2.
34
+
35
+ ## Installation
36
+
37
+ See [installation instructions](https://detectron2.readthedocs.io/tutorials/install.html).
38
+
39
+ ## Getting Started
40
+
41
+ See [Getting Started with Detectron2](https://detectron2.readthedocs.io/tutorials/getting_started.html),
42
+ and the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
43
+ to learn about basic usage.
44
+
45
+ Learn more at our [documentation](https://detectron2.readthedocs.org).
46
+ And see [projects/](projects/) for some projects that are built on top of detectron2.
47
+
48
+ ## Model Zoo and Baselines
49
+
50
+ We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md).
51
+
52
+ ## License
53
+
54
+ Detectron2 is released under the [Apache 2.0 license](LICENSE).
55
+
56
+ ## Citing Detectron2
57
+
58
+ If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.
59
+
60
+ ```BibTeX
61
+ @misc{wu2019detectron2,
62
+ author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and
63
+ Wan-Yen Lo and Ross Girshick},
64
+ title = {Detectron2},
65
+ howpublished = {\url{https://github.com/facebookresearch/detectron2}},
66
+ year = {2019}
67
+ }
68
+ ```
detectron2/build/lib.linux-x86_64-3.10/detectron2/_C.cpython-310-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b144d19f93f6ecb19d3c658e41bbe146f5c4cb3ddfd7ae691b41fc62475aa8a
3
+ size 17491024
detectron2/build/lib.linux-x86_64-3.10/detectron2/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ from .utils.env import setup_environment
4
+
5
+ setup_environment()
6
+
7
+
8
+ # This line will be programatically read/write by setup.py.
9
+ # Leave them at the bottom of this file and don't touch them.
10
+ __version__ = "0.6"
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+ # File:
4
+
5
+
6
+ from . import catalog as _UNUSED # register the handler
7
+ from .detection_checkpoint import DetectionCheckpointer
8
+ from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
9
+
10
+ __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/c2_model_loading.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+ import re
5
+ from typing import Dict, List
6
+ import torch
7
+ from tabulate import tabulate
8
+
9
+
10
+ def convert_basic_c2_names(original_keys):
11
+ """
12
+ Apply some basic name conversion to names in C2 weights.
13
+ It only deals with typical backbone models.
14
+
15
+ Args:
16
+ original_keys (list[str]):
17
+ Returns:
18
+ list[str]: The same number of strings matching those in original_keys.
19
+ """
20
+ layer_keys = copy.deepcopy(original_keys)
21
+ layer_keys = [
22
+ {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
23
+ ] # some hard-coded mappings
24
+
25
+ layer_keys = [k.replace("_", ".") for k in layer_keys]
26
+ layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
27
+ layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
28
+ # Uniform both bn and gn names to "norm"
29
+ layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
30
+ layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
31
+ layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
32
+ layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
33
+ layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
34
+ layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
35
+ layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
36
+ layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
37
+ layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
38
+ layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
39
+
40
+ # stem
41
+ layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
42
+ # to avoid mis-matching with "conv1" in other components (e.g. detection head)
43
+ layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
44
+
45
+ # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
46
+ # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
47
+ # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
48
+ # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
49
+ # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
50
+
51
+ # blocks
52
+ layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
53
+ layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
54
+ layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
55
+ layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
56
+
57
+ # DensePose substitutions
58
+ layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
59
+ layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
60
+ layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
61
+ layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
62
+ layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
63
+ return layer_keys
64
+
65
+
66
+ def convert_c2_detectron_names(weights):
67
+ """
68
+ Map Caffe2 Detectron weight names to Detectron2 names.
69
+
70
+ Args:
71
+ weights (dict): name -> tensor
72
+
73
+ Returns:
74
+ dict: detectron2 names -> tensor
75
+ dict: detectron2 names -> C2 names
76
+ """
77
+ logger = logging.getLogger(__name__)
78
+ logger.info("Renaming Caffe2 weights ......")
79
+ original_keys = sorted(weights.keys())
80
+ layer_keys = copy.deepcopy(original_keys)
81
+
82
+ layer_keys = convert_basic_c2_names(layer_keys)
83
+
84
+ # --------------------------------------------------------------------------
85
+ # RPN hidden representation conv
86
+ # --------------------------------------------------------------------------
87
+ # FPN case
88
+ # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
89
+ # shared for all other levels, hence the appearance of "fpn2"
90
+ layer_keys = [
91
+ k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
92
+ ]
93
+ # Non-FPN case
94
+ layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
95
+
96
+ # --------------------------------------------------------------------------
97
+ # RPN box transformation conv
98
+ # --------------------------------------------------------------------------
99
+ # FPN case (see note above about "fpn2")
100
+ layer_keys = [
101
+ k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
102
+ for k in layer_keys
103
+ ]
104
+ layer_keys = [
105
+ k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
106
+ for k in layer_keys
107
+ ]
108
+ # Non-FPN case
109
+ layer_keys = [
110
+ k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
111
+ ]
112
+ layer_keys = [
113
+ k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
114
+ for k in layer_keys
115
+ ]
116
+
117
+ # --------------------------------------------------------------------------
118
+ # Fast R-CNN box head
119
+ # --------------------------------------------------------------------------
120
+ layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
121
+ layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
122
+ layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
123
+ layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
124
+ # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
125
+ layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
126
+
127
+ # --------------------------------------------------------------------------
128
+ # FPN lateral and output convolutions
129
+ # --------------------------------------------------------------------------
130
+ def fpn_map(name):
131
+ """
132
+ Look for keys with the following patterns:
133
+ 1) Starts with "fpn.inner."
134
+ Example: "fpn.inner.res2.2.sum.lateral.weight"
135
+ Meaning: These are lateral pathway convolutions
136
+ 2) Starts with "fpn.res"
137
+ Example: "fpn.res2.2.sum.weight"
138
+ Meaning: These are FPN output convolutions
139
+ """
140
+ splits = name.split(".")
141
+ norm = ".norm" if "norm" in splits else ""
142
+ if name.startswith("fpn.inner."):
143
+ # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
144
+ stage = int(splits[2][len("res") :])
145
+ return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
146
+ elif name.startswith("fpn.res"):
147
+ # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
148
+ stage = int(splits[1][len("res") :])
149
+ return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
150
+ return name
151
+
152
+ layer_keys = [fpn_map(k) for k in layer_keys]
153
+
154
+ # --------------------------------------------------------------------------
155
+ # Mask R-CNN mask head
156
+ # --------------------------------------------------------------------------
157
+ # roi_heads.StandardROIHeads case
158
+ layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
159
+ layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
160
+ layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
161
+ # roi_heads.Res5ROIHeads case
162
+ layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
163
+
164
+ # --------------------------------------------------------------------------
165
+ # Keypoint R-CNN head
166
+ # --------------------------------------------------------------------------
167
+ # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
168
+ layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
169
+ layer_keys = [
170
+ k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
171
+ ]
172
+ layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
173
+
174
+ # --------------------------------------------------------------------------
175
+ # Done with replacements
176
+ # --------------------------------------------------------------------------
177
+ assert len(set(layer_keys)) == len(layer_keys)
178
+ assert len(original_keys) == len(layer_keys)
179
+
180
+ new_weights = {}
181
+ new_keys_to_original_keys = {}
182
+ for orig, renamed in zip(original_keys, layer_keys):
183
+ new_keys_to_original_keys[renamed] = orig
184
+ if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
185
+ # remove the meaningless prediction weight for background class
186
+ new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
187
+ new_weights[renamed] = weights[orig][new_start_idx:]
188
+ logger.info(
189
+ "Remove prediction weight for background class in {}. The shape changes from "
190
+ "{} to {}.".format(
191
+ renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
192
+ )
193
+ )
194
+ elif renamed.startswith("cls_score."):
195
+ # move weights of bg class from original index 0 to last index
196
+ logger.info(
197
+ "Move classification weights for background class in {} from index 0 to "
198
+ "index {}.".format(renamed, weights[orig].shape[0] - 1)
199
+ )
200
+ new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
201
+ else:
202
+ new_weights[renamed] = weights[orig]
203
+
204
+ return new_weights, new_keys_to_original_keys
205
+
206
+
207
+ # Note the current matching is not symmetric.
208
+ # it assumes model_state_dict will have longer names.
209
+ def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
210
+ """
211
+ Match names between the two state-dict, and returns a new chkpt_state_dict with names
212
+ converted to match model_state_dict with heuristics. The returned dict can be later
213
+ loaded with fvcore checkpointer.
214
+ If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
215
+ model and will be renamed at first.
216
+
217
+ Strategy: suppose that the models that we will create will have prefixes appended
218
+ to each of its keys, for example due to an extra level of nesting that the original
219
+ pre-trained weights from ImageNet won't contain. For example, model.state_dict()
220
+ might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
221
+ res2.conv1.weight. We thus want to match both parameters together.
222
+ For that, we look for each model weight, look among all loaded keys if there is one
223
+ that is a suffix of the current weight name, and use it if that's the case.
224
+ If multiple matches exist, take the one with longest size
225
+ of the corresponding name. For example, for the same model as before, the pretrained
226
+ weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
227
+ we want to match backbone[0].body.conv1.weight to conv1.weight, and
228
+ backbone[0].body.res2.conv1.weight to res2.conv1.weight.
229
+ """
230
+ model_keys = sorted(model_state_dict.keys())
231
+ if c2_conversion:
232
+ ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
233
+ # original_keys: the name in the original dict (before renaming)
234
+ else:
235
+ original_keys = {x: x for x in ckpt_state_dict.keys()}
236
+ ckpt_keys = sorted(ckpt_state_dict.keys())
237
+
238
+ def match(a, b):
239
+ # Matched ckpt_key should be a complete (starts with '.') suffix.
240
+ # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
241
+ # but matches whatever_conv1 or mesh_head.whatever_conv1.
242
+ return a == b or a.endswith("." + b)
243
+
244
+ # get a matrix of string matches, where each (i, j) entry correspond to the size of the
245
+ # ckpt_key string, if it matches
246
+ match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
247
+ match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
248
+ # use the matched one with longest size in case of multiple matches
249
+ max_match_size, idxs = match_matrix.max(1)
250
+ # remove indices that correspond to no-match
251
+ idxs[max_match_size == 0] = -1
252
+
253
+ logger = logging.getLogger(__name__)
254
+ # matched_pairs (matched checkpoint key --> matched model key)
255
+ matched_keys = {}
256
+ result_state_dict = {}
257
+ for idx_model, idx_ckpt in enumerate(idxs.tolist()):
258
+ if idx_ckpt == -1:
259
+ continue
260
+ key_model = model_keys[idx_model]
261
+ key_ckpt = ckpt_keys[idx_ckpt]
262
+ value_ckpt = ckpt_state_dict[key_ckpt]
263
+ shape_in_model = model_state_dict[key_model].shape
264
+
265
+ if shape_in_model != value_ckpt.shape:
266
+ logger.warning(
267
+ "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
268
+ key_ckpt, value_ckpt.shape, key_model, shape_in_model
269
+ )
270
+ )
271
+ logger.warning(
272
+ "{} will not be loaded. Please double check and see if this is desired.".format(
273
+ key_ckpt
274
+ )
275
+ )
276
+ continue
277
+
278
+ assert key_model not in result_state_dict
279
+ result_state_dict[key_model] = value_ckpt
280
+ if key_ckpt in matched_keys: # already added to matched_keys
281
+ logger.error(
282
+ "Ambiguity found for {} in checkpoint!"
283
+ "It matches at least two keys in the model ({} and {}).".format(
284
+ key_ckpt, key_model, matched_keys[key_ckpt]
285
+ )
286
+ )
287
+ raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
288
+
289
+ matched_keys[key_ckpt] = key_model
290
+
291
+ # logging:
292
+ matched_model_keys = sorted(matched_keys.values())
293
+ if len(matched_model_keys) == 0:
294
+ logger.warning("No weights in checkpoint matched with model.")
295
+ return ckpt_state_dict
296
+ common_prefix = _longest_common_prefix(matched_model_keys)
297
+ rev_matched_keys = {v: k for k, v in matched_keys.items()}
298
+ original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
299
+
300
+ model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
301
+ table = []
302
+ memo = set()
303
+ for key_model in matched_model_keys:
304
+ if key_model in memo:
305
+ continue
306
+ if key_model in model_key_groups:
307
+ group = model_key_groups[key_model]
308
+ memo |= set(group)
309
+ shapes = [tuple(model_state_dict[k].shape) for k in group]
310
+ table.append(
311
+ (
312
+ _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
313
+ _group_str([original_keys[k] for k in group]),
314
+ " ".join([str(x).replace(" ", "") for x in shapes]),
315
+ )
316
+ )
317
+ else:
318
+ key_checkpoint = original_keys[key_model]
319
+ shape = str(tuple(model_state_dict[key_model].shape))
320
+ table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
321
+ table_str = tabulate(
322
+ table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"]
323
+ )
324
+ logger.info(
325
+ "Following weights matched with "
326
+ + (f"submodule {common_prefix[:-1]}" if common_prefix else "model")
327
+ + ":\n"
328
+ + table_str
329
+ )
330
+
331
+ unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
332
+ for k in unmatched_ckpt_keys:
333
+ result_state_dict[k] = ckpt_state_dict[k]
334
+ return result_state_dict
335
+
336
+
337
+ def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
338
+ """
339
+ Params in the same submodule are grouped together.
340
+
341
+ Args:
342
+ keys: names of all parameters
343
+ original_names: mapping from parameter name to their name in the checkpoint
344
+
345
+ Returns:
346
+ dict[name -> all other names in the same group]
347
+ """
348
+
349
+ def _submodule_name(key):
350
+ pos = key.rfind(".")
351
+ if pos < 0:
352
+ return None
353
+ prefix = key[: pos + 1]
354
+ return prefix
355
+
356
+ all_submodules = [_submodule_name(k) for k in keys]
357
+ all_submodules = [x for x in all_submodules if x]
358
+ all_submodules = sorted(all_submodules, key=len)
359
+
360
+ ret = {}
361
+ for prefix in all_submodules:
362
+ group = [k for k in keys if k.startswith(prefix)]
363
+ if len(group) <= 1:
364
+ continue
365
+ original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
366
+ if len(original_name_lcp) == 0:
367
+ # don't group weights if original names don't share prefix
368
+ continue
369
+
370
+ for k in group:
371
+ if k in ret:
372
+ continue
373
+ ret[k] = group
374
+ return ret
375
+
376
+
377
+ def _longest_common_prefix(names: List[str]) -> str:
378
+ """
379
+ ["abc.zfg", "abc.zef"] -> "abc."
380
+ """
381
+ names = [n.split(".") for n in names]
382
+ m1, m2 = min(names), max(names)
383
+ ret = [a for a, b in zip(m1, m2) if a == b]
384
+ ret = ".".join(ret) + "." if len(ret) else ""
385
+ return ret
386
+
387
+
388
+ def _longest_common_prefix_str(names: List[str]) -> str:
389
+ m1, m2 = min(names), max(names)
390
+ lcp = [a for a, b in zip(m1, m2) if a == b]
391
+ lcp = "".join(lcp)
392
+ return lcp
393
+
394
+
395
+ def _group_str(names: List[str]) -> str:
396
+ """
397
+ Turn "common1", "common2", "common3" into "common{1,2,3}"
398
+ """
399
+ lcp = _longest_common_prefix_str(names)
400
+ rest = [x[len(lcp) :] for x in names]
401
+ rest = "{" + ",".join(rest) + "}"
402
+ ret = lcp + rest
403
+
404
+ # add some simplification for BN specifically
405
+ ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
406
+ ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
407
+ return ret
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/catalog.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+
4
+ from detectron2.utils.file_io import PathHandler, PathManager
5
+
6
+
7
+ class ModelCatalog(object):
8
+ """
9
+ Store mappings from names to third-party models.
10
+ """
11
+
12
+ S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
13
+
14
+ # MSRA models have STRIDE_IN_1X1=True. False otherwise.
15
+ # NOTE: all BN models here have fused BN into an affine layer.
16
+ # As a result, you should only load them to a model with "FrozenBN".
17
+ # Loading them to a model with regular BN or SyncBN is wrong.
18
+ # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
19
+ # which should be negligible for training.
20
+ # NOTE: all models here uses PIXEL_STD=[1,1,1]
21
+ # NOTE: Most of the BN models here are no longer used. We use the
22
+ # re-converted pre-trained models under detectron2 model zoo instead.
23
+ C2_IMAGENET_MODELS = {
24
+ "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
25
+ "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
26
+ "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
27
+ "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
28
+ "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
29
+ "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
30
+ "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
31
+ }
32
+
33
+ C2_DETECTRON_PATH_FORMAT = (
34
+ "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
35
+ )
36
+
37
+ C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
38
+ C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
39
+
40
+ # format: {model_name} -> part of the url
41
+ C2_DETECTRON_MODELS = {
42
+ "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
43
+ "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
44
+ "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
45
+ "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
46
+ "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
47
+ "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
48
+ "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
49
+ "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
50
+ "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
51
+ "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
52
+ "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
53
+ "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
54
+ "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
55
+ }
56
+
57
+ @staticmethod
58
+ def get(name):
59
+ if name.startswith("Caffe2Detectron/COCO"):
60
+ return ModelCatalog._get_c2_detectron_baseline(name)
61
+ if name.startswith("ImageNetPretrained/"):
62
+ return ModelCatalog._get_c2_imagenet_pretrained(name)
63
+ raise RuntimeError("model not present in the catalog: {}".format(name))
64
+
65
+ @staticmethod
66
+ def _get_c2_imagenet_pretrained(name):
67
+ prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
68
+ name = name[len("ImageNetPretrained/") :]
69
+ name = ModelCatalog.C2_IMAGENET_MODELS[name]
70
+ url = "/".join([prefix, name])
71
+ return url
72
+
73
+ @staticmethod
74
+ def _get_c2_detectron_baseline(name):
75
+ name = name[len("Caffe2Detectron/COCO/") :]
76
+ url = ModelCatalog.C2_DETECTRON_MODELS[name]
77
+ if "keypoint_rcnn" in name:
78
+ dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
79
+ else:
80
+ dataset = ModelCatalog.C2_DATASET_COCO
81
+
82
+ if "35998355/rpn_R-50-C4_1x" in name:
83
+ # this one model is somehow different from others ..
84
+ type = "rpn"
85
+ else:
86
+ type = "generalized_rcnn"
87
+
88
+ # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
89
+ url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
90
+ prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
91
+ )
92
+ return url
93
+
94
+
95
+ class ModelCatalogHandler(PathHandler):
96
+ """
97
+ Resolve URL like catalog://.
98
+ """
99
+
100
+ PREFIX = "catalog://"
101
+
102
+ def _get_supported_prefixes(self):
103
+ return [self.PREFIX]
104
+
105
+ def _get_local_path(self, path, **kwargs):
106
+ logger = logging.getLogger(__name__)
107
+ catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
108
+ logger.info("Catalog entry {} points to {}".format(path, catalog_path))
109
+ return PathManager.get_local_path(catalog_path, **kwargs)
110
+
111
+ def _open(self, path, mode="r", **kwargs):
112
+ return PathManager.open(self._get_local_path(path), mode, **kwargs)
113
+
114
+
115
+ PathManager.register_handler(ModelCatalogHandler())
detectron2/build/lib.linux-x86_64-3.10/detectron2/checkpoint/detection_checkpoint.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+ import os
4
+ import pickle
5
+ import torch
6
+ from fvcore.common.checkpoint import Checkpointer
7
+ from torch.nn.parallel import DistributedDataParallel
8
+
9
+ import detectron2.utils.comm as comm
10
+ from detectron2.utils.file_io import PathManager
11
+
12
+ from .c2_model_loading import align_and_update_state_dicts
13
+
14
+
15
+ class DetectionCheckpointer(Checkpointer):
16
+ """
17
+ Same as :class:`Checkpointer`, but is able to:
18
+ 1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models.
19
+ 2. correctly load checkpoints that are only available on the master worker
20
+ """
21
+
22
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
23
+ is_main_process = comm.is_main_process()
24
+ super().__init__(
25
+ model,
26
+ save_dir,
27
+ save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
28
+ **checkpointables,
29
+ )
30
+ self.path_manager = PathManager
31
+
32
+ def load(self, path, *args, **kwargs):
33
+ need_sync = False
34
+
35
+ if path and isinstance(self.model, DistributedDataParallel):
36
+ logger = logging.getLogger(__name__)
37
+ path = self.path_manager.get_local_path(path)
38
+ has_file = os.path.isfile(path)
39
+ all_has_file = comm.all_gather(has_file)
40
+ if not all_has_file[0]:
41
+ raise OSError(f"File {path} not found on main worker.")
42
+ if not all(all_has_file):
43
+ logger.warning(
44
+ f"Not all workers can read checkpoint {path}. "
45
+ "Training may fail to fully resume."
46
+ )
47
+ # TODO: broadcast the checkpoint file contents from main
48
+ # worker, and load from it instead.
49
+ need_sync = True
50
+ if not has_file:
51
+ path = None # don't load if not readable
52
+ ret = super().load(path, *args, **kwargs)
53
+
54
+ if need_sync:
55
+ logger.info("Broadcasting model states from main worker ...")
56
+ self.model._sync_params_and_buffers()
57
+ return ret
58
+
59
+ def _load_file(self, filename):
60
+ if filename.endswith(".pkl"):
61
+ with PathManager.open(filename, "rb") as f:
62
+ data = pickle.load(f, encoding="latin1")
63
+ if "model" in data and "__author__" in data:
64
+ # file is in Detectron2 model zoo format
65
+ self.logger.info("Reading a file from '{}'".format(data["__author__"]))
66
+ return data
67
+ else:
68
+ # assume file is from Caffe2 / Detectron1 model zoo
69
+ if "blobs" in data:
70
+ # Detection models have "blobs", but ImageNet models don't
71
+ data = data["blobs"]
72
+ data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
73
+ return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
74
+ elif filename.endswith(".pyth"):
75
+ # assume file is from pycls; no one else seems to use the ".pyth" extension
76
+ with PathManager.open(filename, "rb") as f:
77
+ data = torch.load(f)
78
+ assert (
79
+ "model_state" in data
80
+ ), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'."
81
+ model_state = {
82
+ k: v
83
+ for k, v in data["model_state"].items()
84
+ if not k.endswith("num_batches_tracked")
85
+ }
86
+ return {"model": model_state, "__author__": "pycls", "matching_heuristics": True}
87
+
88
+ loaded = super()._load_file(filename) # load native pth checkpoint
89
+ if "model" not in loaded:
90
+ loaded = {"model": loaded}
91
+ loaded["matching_heuristics"] = True
92
+ return loaded
93
+
94
+ def _load_model(self, checkpoint):
95
+ if checkpoint.get("matching_heuristics", False):
96
+ self._convert_ndarray_to_tensor(checkpoint["model"])
97
+ # convert weights by name-matching heuristics
98
+ checkpoint["model"] = align_and_update_state_dicts(
99
+ self.model.state_dict(),
100
+ checkpoint["model"],
101
+ c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
102
+ )
103
+ # for non-caffe2 models, use standard ways to load it
104
+ incompatible = super()._load_model(checkpoint)
105
+
106
+ model_buffers = dict(self.model.named_buffers(recurse=False))
107
+ for k in ["pixel_mean", "pixel_std"]:
108
+ # Ignore missing key message about pixel_mean/std.
109
+ # Though they may be missing in old checkpoints, they will be correctly
110
+ # initialized from config anyway.
111
+ if k in model_buffers:
112
+ try:
113
+ incompatible.missing_keys.remove(k)
114
+ except ValueError:
115
+ pass
116
+ for k in incompatible.unexpected_keys[:]:
117
+ # Ignore unexpected keys about cell anchors. They exist in old checkpoints
118
+ # but now they are non-persistent buffers and will not be in new checkpoints.
119
+ if "anchor_generator.cell_anchors" in k:
120
+ incompatible.unexpected_keys.remove(k)
121
+ return incompatible
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .compat import downgrade_config, upgrade_config
3
+ from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
4
+ from .instantiate import instantiate
5
+ from .lazy import LazyCall, LazyConfig
6
+
7
+ __all__ = [
8
+ "CfgNode",
9
+ "get_cfg",
10
+ "global_cfg",
11
+ "set_global_cfg",
12
+ "downgrade_config",
13
+ "upgrade_config",
14
+ "configurable",
15
+ "instantiate",
16
+ "LazyCall",
17
+ "LazyConfig",
18
+ ]
19
+
20
+
21
+ from detectron2.utils.env import fixup_module_metadata
22
+
23
+ fixup_module_metadata(__name__, globals(), __all__)
24
+ del fixup_module_metadata
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/compat.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ """
3
+ Backward compatibility of configs.
4
+
5
+ Instructions to bump version:
6
+ + It's not needed to bump version if new keys are added.
7
+ It's only needed when backward-incompatible changes happen
8
+ (i.e., some existing keys disappear, or the meaning of a key changes)
9
+ + To bump version, do the following:
10
+ 1. Increment _C.VERSION in defaults.py
11
+ 2. Add a converter in this file.
12
+
13
+ Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
14
+ and a function "downgrade" which in-place downgrades config from X to X-1
15
+
16
+ In each function, VERSION is left unchanged.
17
+
18
+ Each converter assumes that its input has the relevant keys
19
+ (i.e., the input is not a partial config).
20
+ 3. Run the tests (test_config.py) to make sure the upgrade & downgrade
21
+ functions are consistent.
22
+ """
23
+
24
+ import logging
25
+ from typing import List, Optional, Tuple
26
+
27
+ from .config import CfgNode as CN
28
+ from .defaults import _C
29
+
30
+ __all__ = ["upgrade_config", "downgrade_config"]
31
+
32
+
33
+ def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
34
+ """
35
+ Upgrade a config from its current version to a newer version.
36
+
37
+ Args:
38
+ cfg (CfgNode):
39
+ to_version (int): defaults to the latest version.
40
+ """
41
+ cfg = cfg.clone()
42
+ if to_version is None:
43
+ to_version = _C.VERSION
44
+
45
+ assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
46
+ cfg.VERSION, to_version
47
+ )
48
+ for k in range(cfg.VERSION, to_version):
49
+ converter = globals()["ConverterV" + str(k + 1)]
50
+ converter.upgrade(cfg)
51
+ cfg.VERSION = k + 1
52
+ return cfg
53
+
54
+
55
+ def downgrade_config(cfg: CN, to_version: int) -> CN:
56
+ """
57
+ Downgrade a config from its current version to an older version.
58
+
59
+ Args:
60
+ cfg (CfgNode):
61
+ to_version (int):
62
+
63
+ Note:
64
+ A general downgrade of arbitrary configs is not always possible due to the
65
+ different functionalities in different versions.
66
+ The purpose of downgrade is only to recover the defaults in old versions,
67
+ allowing it to load an old partial yaml config.
68
+ Therefore, the implementation only needs to fill in the default values
69
+ in the old version when a general downgrade is not possible.
70
+ """
71
+ cfg = cfg.clone()
72
+ assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
73
+ cfg.VERSION, to_version
74
+ )
75
+ for k in range(cfg.VERSION, to_version, -1):
76
+ converter = globals()["ConverterV" + str(k)]
77
+ converter.downgrade(cfg)
78
+ cfg.VERSION = k - 1
79
+ return cfg
80
+
81
+
82
+ def guess_version(cfg: CN, filename: str) -> int:
83
+ """
84
+ Guess the version of a partial config where the VERSION field is not specified.
85
+ Returns the version, or the latest if cannot make a guess.
86
+
87
+ This makes it easier for users to migrate.
88
+ """
89
+ logger = logging.getLogger(__name__)
90
+
91
+ def _has(name: str) -> bool:
92
+ cur = cfg
93
+ for n in name.split("."):
94
+ if n not in cur:
95
+ return False
96
+ cur = cur[n]
97
+ return True
98
+
99
+ # Most users' partial configs have "MODEL.WEIGHT", so guess on it
100
+ ret = None
101
+ if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
102
+ ret = 1
103
+
104
+ if ret is not None:
105
+ logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
106
+ else:
107
+ ret = _C.VERSION
108
+ logger.warning(
109
+ "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
110
+ filename, ret
111
+ )
112
+ )
113
+ return ret
114
+
115
+
116
+ def _rename(cfg: CN, old: str, new: str) -> None:
117
+ old_keys = old.split(".")
118
+ new_keys = new.split(".")
119
+
120
+ def _set(key_seq: List[str], val: str) -> None:
121
+ cur = cfg
122
+ for k in key_seq[:-1]:
123
+ if k not in cur:
124
+ cur[k] = CN()
125
+ cur = cur[k]
126
+ cur[key_seq[-1]] = val
127
+
128
+ def _get(key_seq: List[str]) -> CN:
129
+ cur = cfg
130
+ for k in key_seq:
131
+ cur = cur[k]
132
+ return cur
133
+
134
+ def _del(key_seq: List[str]) -> None:
135
+ cur = cfg
136
+ for k in key_seq[:-1]:
137
+ cur = cur[k]
138
+ del cur[key_seq[-1]]
139
+ if len(cur) == 0 and len(key_seq) > 1:
140
+ _del(key_seq[:-1])
141
+
142
+ _set(new_keys, _get(old_keys))
143
+ _del(old_keys)
144
+
145
+
146
+ class _RenameConverter:
147
+ """
148
+ A converter that handles simple rename.
149
+ """
150
+
151
+ RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name)
152
+
153
+ @classmethod
154
+ def upgrade(cls, cfg: CN) -> None:
155
+ for old, new in cls.RENAME:
156
+ _rename(cfg, old, new)
157
+
158
+ @classmethod
159
+ def downgrade(cls, cfg: CN) -> None:
160
+ for old, new in cls.RENAME[::-1]:
161
+ _rename(cfg, new, old)
162
+
163
+
164
+ class ConverterV1(_RenameConverter):
165
+ RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
166
+
167
+
168
+ class ConverterV2(_RenameConverter):
169
+ """
170
+ A large bulk of rename, before public release.
171
+ """
172
+
173
+ RENAME = [
174
+ ("MODEL.WEIGHT", "MODEL.WEIGHTS"),
175
+ ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
176
+ ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
177
+ ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
178
+ ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
179
+ (
180
+ "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
181
+ "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
182
+ ),
183
+ (
184
+ "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
185
+ "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
186
+ ),
187
+ (
188
+ "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
189
+ "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
190
+ ),
191
+ ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
192
+ ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
193
+ ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
194
+ ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
195
+ ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
196
+ ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
197
+ ("TEST.AUG_ON", "TEST.AUG.ENABLED"),
198
+ ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
199
+ ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
200
+ ("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
201
+ ]
202
+
203
+ @classmethod
204
+ def upgrade(cls, cfg: CN) -> None:
205
+ super().upgrade(cfg)
206
+
207
+ if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
208
+ _rename(
209
+ cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
210
+ )
211
+ _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
212
+ del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
213
+ del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
214
+ else:
215
+ _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
216
+ _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
217
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
218
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
219
+ del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
220
+
221
+ @classmethod
222
+ def downgrade(cls, cfg: CN) -> None:
223
+ super().downgrade(cfg)
224
+
225
+ _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
226
+ _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
227
+ cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
228
+ cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
229
+ cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/config.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+
4
+ import functools
5
+ import inspect
6
+ import logging
7
+ from fvcore.common.config import CfgNode as _CfgNode
8
+
9
+ from detectron2.utils.file_io import PathManager
10
+
11
+
12
+ class CfgNode(_CfgNode):
13
+ """
14
+ The same as `fvcore.common.config.CfgNode`, but different in:
15
+
16
+ 1. Use unsafe yaml loading by default.
17
+ Note that this may lead to arbitrary code execution: you must not
18
+ load a config file from untrusted sources before manually inspecting
19
+ the content of the file.
20
+ 2. Support config versioning.
21
+ When attempting to merge an old config, it will convert the old config automatically.
22
+
23
+ .. automethod:: clone
24
+ .. automethod:: freeze
25
+ .. automethod:: defrost
26
+ .. automethod:: is_frozen
27
+ .. automethod:: load_yaml_with_base
28
+ .. automethod:: merge_from_list
29
+ .. automethod:: merge_from_other_cfg
30
+ """
31
+
32
+ @classmethod
33
+ def _open_cfg(cls, filename):
34
+ return PathManager.open(filename, "r")
35
+
36
+ # Note that the default value of allow_unsafe is changed to True
37
+ def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
38
+ """
39
+ Load content from the given config file and merge it into self.
40
+
41
+ Args:
42
+ cfg_filename: config filename
43
+ allow_unsafe: allow unsafe yaml syntax
44
+ """
45
+ assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
46
+ loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
47
+ loaded_cfg = type(self)(loaded_cfg)
48
+
49
+ # defaults.py needs to import CfgNode
50
+ from .defaults import _C
51
+
52
+ latest_ver = _C.VERSION
53
+ assert (
54
+ latest_ver == self.VERSION
55
+ ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+ loaded_ver = loaded_cfg.get("VERSION", None)
60
+ if loaded_ver is None:
61
+ from .compat import guess_version
62
+
63
+ loaded_ver = guess_version(loaded_cfg, cfg_filename)
64
+ assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
65
+ loaded_ver, self.VERSION
66
+ )
67
+
68
+ if loaded_ver == self.VERSION:
69
+ self.merge_from_other_cfg(loaded_cfg)
70
+ else:
71
+ # compat.py needs to import CfgNode
72
+ from .compat import upgrade_config, downgrade_config
73
+
74
+ logger.warning(
75
+ "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
76
+ "See docs/CHANGELOG.md for instructions to update your files.".format(
77
+ loaded_ver, cfg_filename, self.VERSION
78
+ )
79
+ )
80
+ # To convert, first obtain a full config at an old version
81
+ old_self = downgrade_config(self, to_version=loaded_ver)
82
+ old_self.merge_from_other_cfg(loaded_cfg)
83
+ new_config = upgrade_config(old_self)
84
+ self.clear()
85
+ self.update(new_config)
86
+
87
+ def dump(self, *args, **kwargs):
88
+ """
89
+ Returns:
90
+ str: a yaml string representation of the config
91
+ """
92
+ # to make it show up in docs
93
+ return super().dump(*args, **kwargs)
94
+
95
+
96
+ global_cfg = CfgNode()
97
+
98
+
99
+ def get_cfg() -> CfgNode:
100
+ """
101
+ Get a copy of the default config.
102
+
103
+ Returns:
104
+ a detectron2 CfgNode instance.
105
+ """
106
+ from .defaults import _C
107
+
108
+ return _C.clone()
109
+
110
+
111
+ def set_global_cfg(cfg: CfgNode) -> None:
112
+ """
113
+ Let the global config point to the given cfg.
114
+
115
+ Assume that the given "cfg" has the key "KEY", after calling
116
+ `set_global_cfg(cfg)`, the key can be accessed by:
117
+ ::
118
+ from detectron2.config import global_cfg
119
+ print(global_cfg.KEY)
120
+
121
+ By using a hacky global config, you can access these configs anywhere,
122
+ without having to pass the config object or the values deep into the code.
123
+ This is a hacky feature introduced for quick prototyping / research exploration.
124
+ """
125
+ global global_cfg
126
+ global_cfg.clear()
127
+ global_cfg.update(cfg)
128
+
129
+
130
+ def configurable(init_func=None, *, from_config=None):
131
+ """
132
+ Decorate a function or a class's __init__ method so that it can be called
133
+ with a :class:`CfgNode` object using a :func:`from_config` function that translates
134
+ :class:`CfgNode` to arguments.
135
+
136
+ Examples:
137
+ ::
138
+ # Usage 1: Decorator on __init__:
139
+ class A:
140
+ @configurable
141
+ def __init__(self, a, b=2, c=3):
142
+ pass
143
+
144
+ @classmethod
145
+ def from_config(cls, cfg): # 'cfg' must be the first argument
146
+ # Returns kwargs to be passed to __init__
147
+ return {"a": cfg.A, "b": cfg.B}
148
+
149
+ a1 = A(a=1, b=2) # regular construction
150
+ a2 = A(cfg) # construct with a cfg
151
+ a3 = A(cfg, b=3, c=4) # construct with extra overwrite
152
+
153
+ # Usage 2: Decorator on any function. Needs an extra from_config argument:
154
+ @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
155
+ def a_func(a, b=2, c=3):
156
+ pass
157
+
158
+ a1 = a_func(a=1, b=2) # regular call
159
+ a2 = a_func(cfg) # call with a cfg
160
+ a3 = a_func(cfg, b=3, c=4) # call with extra overwrite
161
+
162
+ Args:
163
+ init_func (callable): a class's ``__init__`` method in usage 1. The
164
+ class must have a ``from_config`` classmethod which takes `cfg` as
165
+ the first argument.
166
+ from_config (callable): the from_config function in usage 2. It must take `cfg`
167
+ as its first argument.
168
+ """
169
+
170
+ if init_func is not None:
171
+ assert (
172
+ inspect.isfunction(init_func)
173
+ and from_config is None
174
+ and init_func.__name__ == "__init__"
175
+ ), "Incorrect use of @configurable. Check API documentation for examples."
176
+
177
+ @functools.wraps(init_func)
178
+ def wrapped(self, *args, **kwargs):
179
+ try:
180
+ from_config_func = type(self).from_config
181
+ except AttributeError as e:
182
+ raise AttributeError(
183
+ "Class with @configurable must have a 'from_config' classmethod."
184
+ ) from e
185
+ if not inspect.ismethod(from_config_func):
186
+ raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
187
+
188
+ if _called_with_cfg(*args, **kwargs):
189
+ explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
190
+ init_func(self, **explicit_args)
191
+ else:
192
+ init_func(self, *args, **kwargs)
193
+
194
+ return wrapped
195
+
196
+ else:
197
+ if from_config is None:
198
+ return configurable # @configurable() is made equivalent to @configurable
199
+ assert inspect.isfunction(
200
+ from_config
201
+ ), "from_config argument of configurable must be a function!"
202
+
203
+ def wrapper(orig_func):
204
+ @functools.wraps(orig_func)
205
+ def wrapped(*args, **kwargs):
206
+ if _called_with_cfg(*args, **kwargs):
207
+ explicit_args = _get_args_from_config(from_config, *args, **kwargs)
208
+ return orig_func(**explicit_args)
209
+ else:
210
+ return orig_func(*args, **kwargs)
211
+
212
+ wrapped.from_config = from_config
213
+ return wrapped
214
+
215
+ return wrapper
216
+
217
+
218
+ def _get_args_from_config(from_config_func, *args, **kwargs):
219
+ """
220
+ Use `from_config` to obtain explicit arguments.
221
+
222
+ Returns:
223
+ dict: arguments to be used for cls.__init__
224
+ """
225
+ signature = inspect.signature(from_config_func)
226
+ if list(signature.parameters.keys())[0] != "cfg":
227
+ if inspect.isfunction(from_config_func):
228
+ name = from_config_func.__name__
229
+ else:
230
+ name = f"{from_config_func.__self__}.from_config"
231
+ raise TypeError(f"{name} must take 'cfg' as the first argument!")
232
+ support_var_arg = any(
233
+ param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
234
+ for param in signature.parameters.values()
235
+ )
236
+ if support_var_arg: # forward all arguments to from_config, if from_config accepts them
237
+ ret = from_config_func(*args, **kwargs)
238
+ else:
239
+ # forward supported arguments to from_config
240
+ supported_arg_names = set(signature.parameters.keys())
241
+ extra_kwargs = {}
242
+ for name in list(kwargs.keys()):
243
+ if name not in supported_arg_names:
244
+ extra_kwargs[name] = kwargs.pop(name)
245
+ ret = from_config_func(*args, **kwargs)
246
+ # forward the other arguments to __init__
247
+ ret.update(extra_kwargs)
248
+ return ret
249
+
250
+
251
+ def _called_with_cfg(*args, **kwargs):
252
+ """
253
+ Returns:
254
+ bool: whether the arguments contain CfgNode and should be considered
255
+ forwarded to from_config.
256
+ """
257
+ from omegaconf import DictConfig
258
+
259
+ if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
260
+ return True
261
+ if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
262
+ return True
263
+ # `from_config`'s first argument is forced to be "cfg".
264
+ # So the above check covers all cases.
265
+ return False
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/defaults.py ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .config import CfgNode as CN
3
+
4
+ # NOTE: given the new config system
5
+ # (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html),
6
+ # we will stop adding new functionalities to default CfgNode.
7
+
8
+ # -----------------------------------------------------------------------------
9
+ # Convention about Training / Test specific parameters
10
+ # -----------------------------------------------------------------------------
11
+ # Whenever an argument can be either used for training or for testing, the
12
+ # corresponding name will be post-fixed by a _TRAIN for a training parameter,
13
+ # or _TEST for a test-specific parameter.
14
+ # For example, the number of images during training will be
15
+ # IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
16
+ # IMAGES_PER_BATCH_TEST
17
+
18
+ # -----------------------------------------------------------------------------
19
+ # Config definition
20
+ # -----------------------------------------------------------------------------
21
+
22
+ _C = CN()
23
+
24
+ # The version number, to upgrade from old configs to new ones if any
25
+ # changes happen. It's recommended to keep a VERSION in your config file.
26
+ _C.VERSION = 2
27
+
28
+ _C.MODEL = CN()
29
+ _C.MODEL.LOAD_PROPOSALS = False
30
+ _C.MODEL.MASK_ON = False
31
+ _C.MODEL.KEYPOINT_ON = False
32
+ _C.MODEL.DEVICE = "cuda"
33
+ _C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
34
+
35
+ # Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
36
+ # to be loaded to the model. You can find available models in the model zoo.
37
+ _C.MODEL.WEIGHTS = ""
38
+
39
+ # Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
40
+ # To train on images of different number of channels, just set different mean & std.
41
+ # Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
42
+ _C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
43
+ # When using pre-trained models in Detectron1 or any MSRA models,
44
+ # std has been absorbed into its conv1 weights, so the std needs to be set 1.
45
+ # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
46
+ _C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
47
+
48
+
49
+ # -----------------------------------------------------------------------------
50
+ # INPUT
51
+ # -----------------------------------------------------------------------------
52
+ _C.INPUT = CN()
53
+ # By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge.
54
+ # Please refer to ResizeShortestEdge for detailed definition.
55
+ # Size of the smallest side of the image during training
56
+ _C.INPUT.MIN_SIZE_TRAIN = (800,)
57
+ # Sample size of smallest side by choice or random selection from range give by
58
+ # INPUT.MIN_SIZE_TRAIN
59
+ _C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
60
+ # Maximum size of the side of the image during training
61
+ _C.INPUT.MAX_SIZE_TRAIN = 1333
62
+ # Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
63
+ _C.INPUT.MIN_SIZE_TEST = 800
64
+ # Maximum size of the side of the image during testing
65
+ _C.INPUT.MAX_SIZE_TEST = 1333
66
+ # Mode for flipping images used in data augmentation during training
67
+ # choose one of ["horizontal, "vertical", "none"]
68
+ _C.INPUT.RANDOM_FLIP = "horizontal"
69
+
70
+ # `True` if cropping is used for data augmentation during training
71
+ _C.INPUT.CROP = CN({"ENABLED": False})
72
+ # Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
73
+ _C.INPUT.CROP.TYPE = "relative_range"
74
+ # Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
75
+ # pixels if CROP.TYPE is "absolute"
76
+ _C.INPUT.CROP.SIZE = [0.9, 0.9]
77
+
78
+
79
+ # Whether the model needs RGB, YUV, HSV etc.
80
+ # Should be one of the modes defined here, as we use PIL to read the image:
81
+ # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
82
+ # with BGR being the one exception. One can set image format to BGR, we will
83
+ # internally use RGB for conversion and flip the channels over
84
+ _C.INPUT.FORMAT = "BGR"
85
+ # The ground truth mask format that the model will use.
86
+ # Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
87
+ _C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask"
88
+
89
+
90
+ # -----------------------------------------------------------------------------
91
+ # Dataset
92
+ # -----------------------------------------------------------------------------
93
+ _C.DATASETS = CN()
94
+ # List of the dataset names for training. Must be registered in DatasetCatalog
95
+ # Samples from these datasets will be merged and used as one dataset.
96
+ _C.DATASETS.TRAIN = ()
97
+ # List of the pre-computed proposal files for training, which must be consistent
98
+ # with datasets listed in DATASETS.TRAIN.
99
+ _C.DATASETS.PROPOSAL_FILES_TRAIN = ()
100
+ # Number of top scoring precomputed proposals to keep for training
101
+ _C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
102
+ # List of the dataset names for testing. Must be registered in DatasetCatalog
103
+ _C.DATASETS.TEST = ()
104
+ # List of the pre-computed proposal files for test, which must be consistent
105
+ # with datasets listed in DATASETS.TEST.
106
+ _C.DATASETS.PROPOSAL_FILES_TEST = ()
107
+ # Number of top scoring precomputed proposals to keep for test
108
+ _C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
109
+
110
+ # -----------------------------------------------------------------------------
111
+ # DataLoader
112
+ # -----------------------------------------------------------------------------
113
+ _C.DATALOADER = CN()
114
+ # Number of data loading threads
115
+ _C.DATALOADER.NUM_WORKERS = 4
116
+ # If True, each batch should contain only images for which the aspect ratio
117
+ # is compatible. This groups portrait images together, and landscape images
118
+ # are not batched with portrait images.
119
+ _C.DATALOADER.ASPECT_RATIO_GROUPING = True
120
+ # Options: TrainingSampler, RepeatFactorTrainingSampler
121
+ _C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
122
+ # Repeat threshold for RepeatFactorTrainingSampler
123
+ _C.DATALOADER.REPEAT_THRESHOLD = 0.0
124
+ # Tf True, when working on datasets that have instance annotations, the
125
+ # training dataloader will filter out images without associated annotations
126
+ _C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
127
+
128
+ # ---------------------------------------------------------------------------- #
129
+ # Backbone options
130
+ # ---------------------------------------------------------------------------- #
131
+ _C.MODEL.BACKBONE = CN()
132
+
133
+ _C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
134
+ # Freeze the first several stages so they are not trained.
135
+ # There are 5 stages in ResNet. The first is a convolution, and the following
136
+ # stages are each group of residual blocks.
137
+ _C.MODEL.BACKBONE.FREEZE_AT = 2
138
+
139
+
140
+ # ---------------------------------------------------------------------------- #
141
+ # FPN options
142
+ # ---------------------------------------------------------------------------- #
143
+ _C.MODEL.FPN = CN()
144
+ # Names of the input feature maps to be used by FPN
145
+ # They must have contiguous power of 2 strides
146
+ # e.g., ["res2", "res3", "res4", "res5"]
147
+ _C.MODEL.FPN.IN_FEATURES = []
148
+ _C.MODEL.FPN.OUT_CHANNELS = 256
149
+
150
+ # Options: "" (no norm), "GN"
151
+ _C.MODEL.FPN.NORM = ""
152
+
153
+ # Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
154
+ _C.MODEL.FPN.FUSE_TYPE = "sum"
155
+
156
+
157
+ # ---------------------------------------------------------------------------- #
158
+ # Proposal generator options
159
+ # ---------------------------------------------------------------------------- #
160
+ _C.MODEL.PROPOSAL_GENERATOR = CN()
161
+ # Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
162
+ _C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
163
+ # Proposal height and width both need to be greater than MIN_SIZE
164
+ # (a the scale used during training or inference)
165
+ _C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
166
+
167
+
168
+ # ---------------------------------------------------------------------------- #
169
+ # Anchor generator options
170
+ # ---------------------------------------------------------------------------- #
171
+ _C.MODEL.ANCHOR_GENERATOR = CN()
172
+ # The generator can be any name in the ANCHOR_GENERATOR registry
173
+ _C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
174
+ # Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
175
+ # Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
176
+ # IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
177
+ # When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
178
+ _C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
179
+ # Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
180
+ # ratios are generated by an anchor generator.
181
+ # Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
182
+ # to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
183
+ # or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
184
+ # for all IN_FEATURES.
185
+ _C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
186
+ # Anchor angles.
187
+ # list[list[float]], the angle in degrees, for each input feature map.
188
+ # ANGLES[i] specifies the list of angles for IN_FEATURES[i].
189
+ _C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
190
+ # Relative offset between the center of the first anchor and the top-left corner of the image
191
+ # Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
192
+ # The value is not expected to affect model accuracy.
193
+ _C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
194
+
195
+ # ---------------------------------------------------------------------------- #
196
+ # RPN options
197
+ # ---------------------------------------------------------------------------- #
198
+ _C.MODEL.RPN = CN()
199
+ _C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY
200
+
201
+ # Names of the input feature maps to be used by RPN
202
+ # e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
203
+ _C.MODEL.RPN.IN_FEATURES = ["res4"]
204
+ # Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
205
+ # Set to -1 or a large value, e.g. 100000, to disable pruning anchors
206
+ _C.MODEL.RPN.BOUNDARY_THRESH = -1
207
+ # IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
208
+ # Minimum overlap required between an anchor and ground-truth box for the
209
+ # (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
210
+ # ==> positive RPN example: 1)
211
+ # Maximum overlap allowed between an anchor and ground-truth box for the
212
+ # (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
213
+ # ==> negative RPN example: 0)
214
+ # Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
215
+ # are ignored (-1)
216
+ _C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
217
+ _C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
218
+ # Number of regions per image used to train RPN
219
+ _C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
220
+ # Target fraction of foreground (positive) examples per RPN minibatch
221
+ _C.MODEL.RPN.POSITIVE_FRACTION = 0.5
222
+ # Options are: "smooth_l1", "giou", "diou", "ciou"
223
+ _C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
224
+ _C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
225
+ # Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
226
+ _C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
227
+ # The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
228
+ _C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
229
+ _C.MODEL.RPN.LOSS_WEIGHT = 1.0
230
+ # Number of top scoring RPN proposals to keep before applying NMS
231
+ # When FPN is used, this is *per FPN level* (not total)
232
+ _C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
233
+ _C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
234
+ # Number of top scoring RPN proposals to keep after applying NMS
235
+ # When FPN is used, this limit is applied per level and then again to the union
236
+ # of proposals from all levels
237
+ # NOTE: When FPN is used, the meaning of this config is different from Detectron1.
238
+ # It means per-batch topk in Detectron1, but per-image topk here.
239
+ # See the "find_top_rpn_proposals" function for details.
240
+ _C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
241
+ _C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
242
+ # NMS threshold used on RPN proposals
243
+ _C.MODEL.RPN.NMS_THRESH = 0.7
244
+ # Set this to -1 to use the same number of output channels as input channels.
245
+ _C.MODEL.RPN.CONV_DIMS = [-1]
246
+
247
+ # ---------------------------------------------------------------------------- #
248
+ # ROI HEADS options
249
+ # ---------------------------------------------------------------------------- #
250
+ _C.MODEL.ROI_HEADS = CN()
251
+ _C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
252
+ # Number of foreground classes
253
+ _C.MODEL.ROI_HEADS.NUM_CLASSES = 80
254
+ # Names of the input feature maps to be used by ROI heads
255
+ # Currently all heads (box, mask, ...) use the same input feature map list
256
+ # e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
257
+ _C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
258
+ # IOU overlap ratios [IOU_THRESHOLD]
259
+ # Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
260
+ # Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
261
+ _C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
262
+ _C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
263
+ # RoI minibatch size *per image* (number of regions of interest [ROIs]) during training
264
+ # Total number of RoIs per training minibatch =
265
+ # ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
266
+ # E.g., a common configuration is: 512 * 16 = 8192
267
+ _C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
268
+ # Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
269
+ _C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
270
+
271
+ # Only used on test mode
272
+
273
+ # Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
274
+ # balance obtaining high recall with not having too many low precision
275
+ # detections that will slow down inference post processing steps (like NMS)
276
+ # A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
277
+ # inference.
278
+ _C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
279
+ # Overlap threshold used for non-maximum suppression (suppress boxes with
280
+ # IoU >= this threshold)
281
+ _C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
282
+ # If True, augment proposals with ground-truth boxes before sampling proposals to
283
+ # train ROI heads.
284
+ _C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
285
+
286
+ # ---------------------------------------------------------------------------- #
287
+ # Box Head
288
+ # ---------------------------------------------------------------------------- #
289
+ _C.MODEL.ROI_BOX_HEAD = CN()
290
+ # C4 don't use head name option
291
+ # Options for non-C4 models: FastRCNNConvFCHead,
292
+ _C.MODEL.ROI_BOX_HEAD.NAME = ""
293
+ # Options are: "smooth_l1", "giou", "diou", "ciou"
294
+ _C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
295
+ # The final scaling coefficient on the box regression loss, used to balance the magnitude of its
296
+ # gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
297
+ _C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
298
+ # Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
299
+ # These are empirically chosen to approximately lead to unit variance targets
300
+ _C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
301
+ # The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
302
+ _C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
303
+ _C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
304
+ _C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
305
+ # Type of pooling operation applied to the incoming feature map for each RoI
306
+ _C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
307
+
308
+ _C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
309
+ # Hidden layer dimension for FC layers in the RoI box head
310
+ _C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
311
+ _C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
312
+ # Channel dimension for Conv layers in the RoI box head
313
+ _C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
314
+ # Normalization method for the convolution layers.
315
+ # Options: "" (no norm), "GN", "SyncBN".
316
+ _C.MODEL.ROI_BOX_HEAD.NORM = ""
317
+ # Whether to use class agnostic for bbox regression
318
+ _C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
319
+ # If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
320
+ _C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
321
+
322
+ # Federated loss can be used to improve the training of LVIS
323
+ _C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False
324
+ # Sigmoid cross entrophy is used with federated loss
325
+ _C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
326
+ # The power value applied to image_count when calcualting frequency weight
327
+ _C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5
328
+ # Number of classes to keep in total
329
+ _C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50
330
+
331
+ # ---------------------------------------------------------------------------- #
332
+ # Cascaded Box Head
333
+ # ---------------------------------------------------------------------------- #
334
+ _C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
335
+ # The number of cascade stages is implicitly defined by the length of the following two configs.
336
+ _C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
337
+ (10.0, 10.0, 5.0, 5.0),
338
+ (20.0, 20.0, 10.0, 10.0),
339
+ (30.0, 30.0, 15.0, 15.0),
340
+ )
341
+ _C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
342
+
343
+
344
+ # ---------------------------------------------------------------------------- #
345
+ # Mask Head
346
+ # ---------------------------------------------------------------------------- #
347
+ _C.MODEL.ROI_MASK_HEAD = CN()
348
+ _C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
349
+ _C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
350
+ _C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
351
+ _C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head
352
+ _C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
353
+ # Normalization method for the convolution layers.
354
+ # Options: "" (no norm), "GN", "SyncBN".
355
+ _C.MODEL.ROI_MASK_HEAD.NORM = ""
356
+ # Whether to use class agnostic for mask prediction
357
+ _C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
358
+ # Type of pooling operation applied to the incoming feature map for each RoI
359
+ _C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
360
+
361
+
362
+ # ---------------------------------------------------------------------------- #
363
+ # Keypoint Head
364
+ # ---------------------------------------------------------------------------- #
365
+ _C.MODEL.ROI_KEYPOINT_HEAD = CN()
366
+ _C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
367
+ _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
368
+ _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
369
+ _C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
370
+ _C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO.
371
+
372
+ # Images with too few (or no) keypoints are excluded from training.
373
+ _C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
374
+ # Normalize by the total number of visible keypoints in the minibatch if True.
375
+ # Otherwise, normalize by the total number of keypoints that could ever exist
376
+ # in the minibatch.
377
+ # The keypoint softmax loss is only calculated on visible keypoints.
378
+ # Since the number of visible keypoints can vary significantly between
379
+ # minibatches, this has the effect of up-weighting the importance of
380
+ # minibatches with few visible keypoints. (Imagine the extreme case of
381
+ # only one visible keypoint versus N: in the case of N, each one
382
+ # contributes 1/N to the gradient compared to the single keypoint
383
+ # determining the gradient direction). Instead, we can normalize the
384
+ # loss by the total number of keypoints, if it were the case that all
385
+ # keypoints were visible in a full minibatch. (Returning to the example,
386
+ # this means that the one visible keypoint contributes as much as each
387
+ # of the N keypoints.)
388
+ _C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
389
+ # Multi-task loss weight to use for keypoints
390
+ # Recommended values:
391
+ # - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
392
+ # - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
393
+ _C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
394
+ # Type of pooling operation applied to the incoming feature map for each RoI
395
+ _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
396
+
397
+ # ---------------------------------------------------------------------------- #
398
+ # Semantic Segmentation Head
399
+ # ---------------------------------------------------------------------------- #
400
+ _C.MODEL.SEM_SEG_HEAD = CN()
401
+ _C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
402
+ _C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
403
+ # Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
404
+ # the correposnding pixel.
405
+ _C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
406
+ # Number of classes in the semantic segmentation head
407
+ _C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
408
+ # Number of channels in the 3x3 convs inside semantic-FPN heads.
409
+ _C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
410
+ # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
411
+ _C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
412
+ # Normalization method for the convolution layers. Options: "" (no norm), "GN".
413
+ _C.MODEL.SEM_SEG_HEAD.NORM = "GN"
414
+ _C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
415
+
416
+ _C.MODEL.PANOPTIC_FPN = CN()
417
+ # Scaling of all losses from instance detection / segmentation head.
418
+ _C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
419
+
420
+ # options when combining instance & semantic segmentation outputs
421
+ _C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used
422
+ _C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
423
+ _C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
424
+ _C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
425
+
426
+
427
+ # ---------------------------------------------------------------------------- #
428
+ # RetinaNet Head
429
+ # ---------------------------------------------------------------------------- #
430
+ _C.MODEL.RETINANET = CN()
431
+
432
+ # This is the number of foreground classes.
433
+ _C.MODEL.RETINANET.NUM_CLASSES = 80
434
+
435
+ _C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
436
+
437
+ # Convolutions to use in the cls and bbox tower
438
+ # NOTE: this doesn't include the last conv for logits
439
+ _C.MODEL.RETINANET.NUM_CONVS = 4
440
+
441
+ # IoU overlap ratio [bg, fg] for labeling anchors.
442
+ # Anchors with < bg are labeled negative (0)
443
+ # Anchors with >= bg and < fg are ignored (-1)
444
+ # Anchors with >= fg are labeled positive (1)
445
+ _C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
446
+ _C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
447
+
448
+ # Prior prob for rare case (i.e. foreground) at the beginning of training.
449
+ # This is used to set the bias for the logits layer of the classifier subnet.
450
+ # This improves training stability in the case of heavy class imbalance.
451
+ _C.MODEL.RETINANET.PRIOR_PROB = 0.01
452
+
453
+ # Inference cls score threshold, only anchors with score > INFERENCE_TH are
454
+ # considered for inference (to improve speed)
455
+ _C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
456
+ # Select topk candidates before NMS
457
+ _C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
458
+ _C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
459
+
460
+ # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
461
+ _C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
462
+
463
+ # Loss parameters
464
+ _C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
465
+ _C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
466
+ _C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
467
+ # Options are: "smooth_l1", "giou", "diou", "ciou"
468
+ _C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
469
+
470
+ # One of BN, SyncBN, FrozenBN, GN
471
+ # Only supports GN until unshared norm is implemented
472
+ _C.MODEL.RETINANET.NORM = ""
473
+
474
+
475
+ # ---------------------------------------------------------------------------- #
476
+ # ResNe[X]t options (ResNets = {ResNet, ResNeXt}
477
+ # Note that parts of a resnet may be used for both the backbone and the head
478
+ # These options apply to both
479
+ # ---------------------------------------------------------------------------- #
480
+ _C.MODEL.RESNETS = CN()
481
+
482
+ _C.MODEL.RESNETS.DEPTH = 50
483
+ _C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone
484
+
485
+ # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
486
+ _C.MODEL.RESNETS.NUM_GROUPS = 1
487
+
488
+ # Options: FrozenBN, GN, "SyncBN", "BN"
489
+ _C.MODEL.RESNETS.NORM = "FrozenBN"
490
+
491
+ # Baseline width of each group.
492
+ # Scaling this parameters will scale the width of all bottleneck layers.
493
+ _C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
494
+
495
+ # Place the stride 2 conv on the 1x1 filter
496
+ # Use True only for the original MSRA ResNet; use False for C2 and Torch models
497
+ _C.MODEL.RESNETS.STRIDE_IN_1X1 = True
498
+
499
+ # Apply dilation in stage "res5"
500
+ _C.MODEL.RESNETS.RES5_DILATION = 1
501
+
502
+ # Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
503
+ # For R18 and R34, this needs to be set to 64
504
+ _C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
505
+ _C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
506
+
507
+ # Apply Deformable Convolution in stages
508
+ # Specify if apply deform_conv on Res2, Res3, Res4, Res5
509
+ _C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
510
+ # Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
511
+ # Use False for DeformableV1.
512
+ _C.MODEL.RESNETS.DEFORM_MODULATED = False
513
+ # Number of groups in deformable conv.
514
+ _C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
515
+
516
+
517
+ # ---------------------------------------------------------------------------- #
518
+ # Solver
519
+ # ---------------------------------------------------------------------------- #
520
+ _C.SOLVER = CN()
521
+
522
+ # Options: WarmupMultiStepLR, WarmupCosineLR.
523
+ # See detectron2/solver/build.py for definition.
524
+ _C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
525
+
526
+ _C.SOLVER.MAX_ITER = 40000
527
+
528
+ _C.SOLVER.BASE_LR = 0.001
529
+ # The end lr, only used by WarmupCosineLR
530
+ _C.SOLVER.BASE_LR_END = 0.0
531
+
532
+ _C.SOLVER.MOMENTUM = 0.9
533
+
534
+ _C.SOLVER.NESTEROV = False
535
+
536
+ _C.SOLVER.WEIGHT_DECAY = 0.0001
537
+ # The weight decay that's applied to parameters of normalization layers
538
+ # (typically the affine transformation)
539
+ _C.SOLVER.WEIGHT_DECAY_NORM = 0.0
540
+
541
+ _C.SOLVER.GAMMA = 0.1
542
+ # The iteration number to decrease learning rate by GAMMA.
543
+ _C.SOLVER.STEPS = (30000,)
544
+
545
+ _C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
546
+ _C.SOLVER.WARMUP_ITERS = 1000
547
+ _C.SOLVER.WARMUP_METHOD = "linear"
548
+
549
+ # Save a checkpoint after every this number of iterations
550
+ _C.SOLVER.CHECKPOINT_PERIOD = 5000
551
+
552
+ # Number of images per batch across all machines. This is also the number
553
+ # of training images per step (i.e. per iteration). If we use 16 GPUs
554
+ # and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
555
+ # May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
556
+ _C.SOLVER.IMS_PER_BATCH = 16
557
+
558
+ # The reference number of workers (GPUs) this config is meant to train with.
559
+ # It takes no effect when set to 0.
560
+ # With a non-zero value, it will be used by DefaultTrainer to compute a desired
561
+ # per-worker batch size, and then scale the other related configs (total batch size,
562
+ # learning rate, etc) to match the per-worker batch size.
563
+ # See documentation of `DefaultTrainer.auto_scale_workers` for details:
564
+ _C.SOLVER.REFERENCE_WORLD_SIZE = 0
565
+
566
+ # Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
567
+ # biases. This is not useful (at least for recent models). You should avoid
568
+ # changing these and they exist only to reproduce Detectron v1 training if
569
+ # desired.
570
+ _C.SOLVER.BIAS_LR_FACTOR = 1.0
571
+ _C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY
572
+
573
+ # Gradient clipping
574
+ _C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
575
+ # Type of gradient clipping, currently 2 values are supported:
576
+ # - "value": the absolute values of elements of each gradients are clipped
577
+ # - "norm": the norm of the gradient for each parameter is clipped thus
578
+ # affecting all elements in the parameter
579
+ _C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
580
+ # Maximum absolute value used for clipping gradients
581
+ _C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
582
+ # Floating point number p for L-p norm to be used with the "norm"
583
+ # gradient clipping type; for L-inf, please specify .inf
584
+ _C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
585
+
586
+ # Enable automatic mixed precision for training
587
+ # Note that this does not change model's inference behavior.
588
+ # To use AMP in inference, run inference under autocast()
589
+ _C.SOLVER.AMP = CN({"ENABLED": False})
590
+
591
+ # ---------------------------------------------------------------------------- #
592
+ # Specific test options
593
+ # ---------------------------------------------------------------------------- #
594
+ _C.TEST = CN()
595
+ # For end-to-end tests to verify the expected accuracy.
596
+ # Each item is [task, metric, value, tolerance]
597
+ # e.g.: [['bbox', 'AP', 38.5, 0.2]]
598
+ _C.TEST.EXPECTED_RESULTS = []
599
+ # The period (in terms of steps) to evaluate the model during training.
600
+ # Set to 0 to disable.
601
+ _C.TEST.EVAL_PERIOD = 0
602
+ # The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
603
+ # When empty, it will use the defaults in COCO.
604
+ # Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
605
+ _C.TEST.KEYPOINT_OKS_SIGMAS = []
606
+ # Maximum number of detections to return per image during inference (100 is
607
+ # based on the limit established for the COCO dataset).
608
+ _C.TEST.DETECTIONS_PER_IMAGE = 100
609
+
610
+ _C.TEST.AUG = CN({"ENABLED": False})
611
+ _C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
612
+ _C.TEST.AUG.MAX_SIZE = 4000
613
+ _C.TEST.AUG.FLIP = True
614
+
615
+ _C.TEST.PRECISE_BN = CN({"ENABLED": False})
616
+ _C.TEST.PRECISE_BN.NUM_ITER = 200
617
+
618
+ # ---------------------------------------------------------------------------- #
619
+ # Misc options
620
+ # ---------------------------------------------------------------------------- #
621
+ # Directory where output files are written
622
+ _C.OUTPUT_DIR = "./output"
623
+ # Set seed to negative to fully randomize everything.
624
+ # Set seed to positive to use a fixed seed. Note that a fixed seed increases
625
+ # reproducibility but does not guarantee fully deterministic behavior.
626
+ # Disabling all parallelism further increases reproducibility.
627
+ _C.SEED = -1
628
+ # Benchmark different cudnn algorithms.
629
+ # If input images have very different sizes, this option will have large overhead
630
+ # for about 10k iterations. It usually hurts total time, but can benefit for certain models.
631
+ # If input images have the same or similar sizes, benchmark is often helpful.
632
+ _C.CUDNN_BENCHMARK = False
633
+ # The period (in terms of steps) for minibatch visualization at train time.
634
+ # Set to 0 to disable.
635
+ _C.VIS_PERIOD = 0
636
+
637
+ # global config is for quick hack purposes.
638
+ # You can set them in command line or config files,
639
+ # and access it with:
640
+ #
641
+ # from detectron2.config import global_cfg
642
+ # print(global_cfg.HACK)
643
+ #
644
+ # Do not commit any configs into it.
645
+ _C.GLOBAL = CN()
646
+ _C.GLOBAL.HACK = 1.0
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/instantiate.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import collections.abc as abc
4
+ import dataclasses
5
+ import logging
6
+ from typing import Any
7
+
8
+ from detectron2.utils.registry import _convert_target_to_string, locate
9
+
10
+ __all__ = ["dump_dataclass", "instantiate"]
11
+
12
+
13
+ def dump_dataclass(obj: Any):
14
+ """
15
+ Dump a dataclass recursively into a dict that can be later instantiated.
16
+
17
+ Args:
18
+ obj: a dataclass object
19
+
20
+ Returns:
21
+ dict
22
+ """
23
+ assert dataclasses.is_dataclass(obj) and not isinstance(
24
+ obj, type
25
+ ), "dump_dataclass() requires an instance of a dataclass."
26
+ ret = {"_target_": _convert_target_to_string(type(obj))}
27
+ for f in dataclasses.fields(obj):
28
+ v = getattr(obj, f.name)
29
+ if dataclasses.is_dataclass(v):
30
+ v = dump_dataclass(v)
31
+ if isinstance(v, (list, tuple)):
32
+ v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
33
+ ret[f.name] = v
34
+ return ret
35
+
36
+
37
+ def instantiate(cfg):
38
+ """
39
+ Recursively instantiate objects defined in dictionaries by
40
+ "_target_" and arguments.
41
+
42
+ Args:
43
+ cfg: a dict-like object with "_target_" that defines the caller, and
44
+ other keys that define the arguments
45
+
46
+ Returns:
47
+ object instantiated by cfg
48
+ """
49
+ from omegaconf import ListConfig
50
+
51
+ if isinstance(cfg, ListConfig):
52
+ lst = [instantiate(x) for x in cfg]
53
+ return ListConfig(lst, flags={"allow_objects": True})
54
+ if isinstance(cfg, list):
55
+ # Specialize for list, because many classes take
56
+ # list[objects] as arguments, such as ResNet, DatasetMapper
57
+ return [instantiate(x) for x in cfg]
58
+
59
+ if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
60
+ # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
61
+ # but faster: https://github.com/facebookresearch/hydra/issues/1200
62
+ cfg = {k: instantiate(v) for k, v in cfg.items()}
63
+ cls = cfg.pop("_target_")
64
+ cls = instantiate(cls)
65
+
66
+ if isinstance(cls, str):
67
+ cls_name = cls
68
+ cls = locate(cls_name)
69
+ assert cls is not None, cls_name
70
+ else:
71
+ try:
72
+ cls_name = cls.__module__ + "." + cls.__qualname__
73
+ except Exception:
74
+ # target could be anything, so the above could fail
75
+ cls_name = str(cls)
76
+ assert callable(cls), f"_target_ {cls} does not define a callable object"
77
+ try:
78
+ return cls(**cfg)
79
+ except TypeError:
80
+ logger = logging.getLogger(__name__)
81
+ logger.error(f"Error when instantiating {cls_name}!")
82
+ raise
83
+ return cfg # return as-is if don't know what to do
detectron2/build/lib.linux-x86_64-3.10/detectron2/config/lazy.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+
3
+ import ast
4
+ import builtins
5
+ import collections.abc as abc
6
+ import importlib
7
+ import inspect
8
+ import logging
9
+ import os
10
+ import uuid
11
+ from contextlib import contextmanager
12
+ from copy import deepcopy
13
+ from dataclasses import is_dataclass
14
+ from typing import List, Tuple, Union
15
+ import cloudpickle
16
+ import yaml
17
+ from omegaconf import DictConfig, ListConfig, OmegaConf
18
+
19
+ from detectron2.utils.file_io import PathManager
20
+ from detectron2.utils.registry import _convert_target_to_string
21
+
22
+ __all__ = ["LazyCall", "LazyConfig"]
23
+
24
+
25
+ class LazyCall:
26
+ """
27
+ Wrap a callable so that when it's called, the call will not be executed,
28
+ but returns a dict that describes the call.
29
+
30
+ LazyCall object has to be called with only keyword arguments. Positional
31
+ arguments are not yet supported.
32
+
33
+ Examples:
34
+ ::
35
+ from detectron2.config import instantiate, LazyCall
36
+
37
+ layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
38
+ layer_cfg.out_channels = 64 # can edit it afterwards
39
+ layer = instantiate(layer_cfg)
40
+ """
41
+
42
+ def __init__(self, target):
43
+ if not (callable(target) or isinstance(target, (str, abc.Mapping))):
44
+ raise TypeError(
45
+ f"target of LazyCall must be a callable or defines a callable! Got {target}"
46
+ )
47
+ self._target = target
48
+
49
+ def __call__(self, **kwargs):
50
+ if is_dataclass(self._target):
51
+ # omegaconf object cannot hold dataclass type
52
+ # https://github.com/omry/omegaconf/issues/784
53
+ target = _convert_target_to_string(self._target)
54
+ else:
55
+ target = self._target
56
+ kwargs["_target_"] = target
57
+
58
+ return DictConfig(content=kwargs, flags={"allow_objects": True})
59
+
60
+
61
+ def _visit_dict_config(cfg, func):
62
+ """
63
+ Apply func recursively to all DictConfig in cfg.
64
+ """
65
+ if isinstance(cfg, DictConfig):
66
+ func(cfg)
67
+ for v in cfg.values():
68
+ _visit_dict_config(v, func)
69
+ elif isinstance(cfg, ListConfig):
70
+ for v in cfg:
71
+ _visit_dict_config(v, func)
72
+
73
+
74
+ def _validate_py_syntax(filename):
75
+ # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
76
+ with PathManager.open(filename, "r") as f:
77
+ content = f.read()
78
+ try:
79
+ ast.parse(content)
80
+ except SyntaxError as e:
81
+ raise SyntaxError(f"Config file {filename} has syntax error!") from e
82
+
83
+
84
+ def _cast_to_config(obj):
85
+ # if given a dict, return DictConfig instead
86
+ if isinstance(obj, dict):
87
+ return DictConfig(obj, flags={"allow_objects": True})
88
+ return obj
89
+
90
+
91
+ _CFG_PACKAGE_NAME = "detectron2._cfg_loader"
92
+ """
93
+ A namespace to put all imported config into.
94
+ """
95
+
96
+
97
+ def _random_package_name(filename):
98
+ # generate a random package name when loading config files
99
+ return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename)
100
+
101
+
102
+ @contextmanager
103
+ def _patch_import():
104
+ """
105
+ Enhance relative import statements in config files, so that they:
106
+ 1. locate files purely based on relative location, regardless of packages.
107
+ e.g. you can import file without having __init__
108
+ 2. do not cache modules globally; modifications of module states has no side effect
109
+ 3. support other storage system through PathManager
110
+ 4. imported dict are turned into omegaconf.DictConfig automatically
111
+ """
112
+ old_import = builtins.__import__
113
+
114
+ def find_relative_file(original_file, relative_import_path, level):
115
+ cur_file = os.path.dirname(original_file)
116
+ for _ in range(level - 1):
117
+ cur_file = os.path.dirname(cur_file)
118
+ cur_name = relative_import_path.lstrip(".")
119
+ for part in cur_name.split("."):
120
+ cur_file = os.path.join(cur_file, part)
121
+ # NOTE: directory import is not handled. Because then it's unclear
122
+ # if such import should produce python module or DictConfig. This can
123
+ # be discussed further if needed.
124
+ if not cur_file.endswith(".py"):
125
+ cur_file += ".py"
126
+ if not PathManager.isfile(cur_file):
127
+ raise ImportError(
128
+ f"Cannot import name {relative_import_path} from "
129
+ f"{original_file}: {cur_file} has to exist."
130
+ )
131
+ return cur_file
132
+
133
+ def new_import(name, globals=None, locals=None, fromlist=(), level=0):
134
+ if (
135
+ # Only deal with relative imports inside config files
136
+ level != 0
137
+ and globals is not None
138
+ and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME)
139
+ ):
140
+ cur_file = find_relative_file(globals["__file__"], name, level)
141
+ _validate_py_syntax(cur_file)
142
+ spec = importlib.machinery.ModuleSpec(
143
+ _random_package_name(cur_file), None, origin=cur_file
144
+ )
145
+ module = importlib.util.module_from_spec(spec)
146
+ module.__file__ = cur_file
147
+ with PathManager.open(cur_file) as f:
148
+ content = f.read()
149
+ exec(compile(content, cur_file, "exec"), module.__dict__)
150
+ for name in fromlist: # turn imported dict into DictConfig automatically
151
+ val = _cast_to_config(module.__dict__[name])
152
+ module.__dict__[name] = val
153
+ return module
154
+ return old_import(name, globals, locals, fromlist=fromlist, level=level)
155
+
156
+ builtins.__import__ = new_import
157
+ yield new_import
158
+ builtins.__import__ = old_import
159
+
160
+
161
+ class LazyConfig:
162
+ """
163
+ Provide methods to save, load, and overrides an omegaconf config object
164
+ which may contain definition of lazily-constructed objects.
165
+ """
166
+
167
+ @staticmethod
168
+ def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
169
+ """
170
+ Similar to :meth:`load()`, but load path relative to the caller's
171
+ source file.
172
+
173
+ This has the same functionality as a relative import, except that this method
174
+ accepts filename as a string, so more characters are allowed in the filename.
175
+ """
176
+ caller_frame = inspect.stack()[1]
177
+ caller_fname = caller_frame[0].f_code.co_filename
178
+ assert caller_fname != "<string>", "load_rel Unable to find caller"
179
+ caller_dir = os.path.dirname(caller_fname)
180
+ filename = os.path.join(caller_dir, filename)
181
+ return LazyConfig.load(filename, keys)
182
+
183
+ @staticmethod
184
+ def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
185
+ """
186
+ Load a config file.
187
+
188
+ Args:
189
+ filename: absolute path or relative path w.r.t. the current working directory
190
+ keys: keys to load and return. If not given, return all keys
191
+ (whose values are config objects) in a dict.
192
+ """
193
+ has_keys = keys is not None
194
+ filename = filename.replace("/./", "/") # redundant
195
+ if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]:
196
+ raise ValueError(f"Config file {filename} has to be a python or yaml file.")
197
+ if filename.endswith(".py"):
198
+ _validate_py_syntax(filename)
199
+
200
+ with _patch_import():
201
+ # Record the filename
202
+ module_namespace = {
203
+ "__file__": filename,
204
+ "__package__": _random_package_name(filename),
205
+ }
206
+ with PathManager.open(filename) as f:
207
+ content = f.read()
208
+ # Compile first with filename to:
209
+ # 1. make filename appears in stacktrace
210
+ # 2. make load_rel able to find its parent's (possibly remote) location
211
+ exec(compile(content, filename, "exec"), module_namespace)
212
+
213
+ ret = module_namespace
214
+ else:
215
+ with PathManager.open(filename) as f:
216
+ obj = yaml.unsafe_load(f)
217
+ ret = OmegaConf.create(obj, flags={"allow_objects": True})
218
+
219
+ if has_keys:
220
+ if isinstance(keys, str):
221
+ return _cast_to_config(ret[keys])
222
+ else:
223
+ return tuple(_cast_to_config(ret[a]) for a in keys)
224
+ else:
225
+ if filename.endswith(".py"):
226
+ # when not specified, only load those that are config objects
227
+ ret = DictConfig(
228
+ {
229
+ name: _cast_to_config(value)
230
+ for name, value in ret.items()
231
+ if isinstance(value, (DictConfig, ListConfig, dict))
232
+ and not name.startswith("_")
233
+ },
234
+ flags={"allow_objects": True},
235
+ )
236
+ return ret
237
+
238
+ @staticmethod
239
+ def save(cfg, filename: str):
240
+ """
241
+ Save a config object to a yaml file.
242
+ Note that when the config dictionary contains complex objects (e.g. lambda),
243
+ it can't be saved to yaml. In that case we will print an error and
244
+ attempt to save to a pkl file instead.
245
+
246
+ Args:
247
+ cfg: an omegaconf config object
248
+ filename: yaml file name to save the config file
249
+ """
250
+ logger = logging.getLogger(__name__)
251
+ try:
252
+ cfg = deepcopy(cfg)
253
+ except Exception:
254
+ pass
255
+ else:
256
+ # if it's deep-copyable, then...
257
+ def _replace_type_by_name(x):
258
+ if "_target_" in x and callable(x._target_):
259
+ try:
260
+ x._target_ = _convert_target_to_string(x._target_)
261
+ except AttributeError:
262
+ pass
263
+
264
+ # not necessary, but makes yaml looks nicer
265
+ _visit_dict_config(cfg, _replace_type_by_name)
266
+
267
+ save_pkl = False
268
+ try:
269
+ dict = OmegaConf.to_container(cfg, resolve=False)
270
+ dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999)
271
+ with PathManager.open(filename, "w") as f:
272
+ f.write(dumped)
273
+
274
+ try:
275
+ _ = yaml.unsafe_load(dumped) # test that it is loadable
276
+ except Exception:
277
+ logger.warning(
278
+ "The config contains objects that cannot serialize to a valid yaml. "
279
+ f"{filename} is human-readable but cannot be loaded."
280
+ )
281
+ save_pkl = True
282
+ except Exception:
283
+ logger.exception("Unable to serialize the config to yaml. Error:")
284
+ save_pkl = True
285
+
286
+ if save_pkl:
287
+ new_filename = filename + ".pkl"
288
+ try:
289
+ # retry by pickle
290
+ with PathManager.open(new_filename, "wb") as f:
291
+ cloudpickle.dump(cfg, f)
292
+ logger.warning(f"Config is saved using cloudpickle at {new_filename}.")
293
+ except Exception:
294
+ pass
295
+
296
+ @staticmethod
297
+ def apply_overrides(cfg, overrides: List[str]):
298
+ """
299
+ In-place override contents of cfg.
300
+
301
+ Args:
302
+ cfg: an omegaconf config object
303
+ overrides: list of strings in the format of "a=b" to override configs.
304
+ See https://hydra.cc/docs/next/advanced/override_grammar/basic/
305
+ for syntax.
306
+
307
+ Returns:
308
+ the cfg object
309
+ """
310
+
311
+ def safe_update(cfg, key, value):
312
+ parts = key.split(".")
313
+ for idx in range(1, len(parts)):
314
+ prefix = ".".join(parts[:idx])
315
+ v = OmegaConf.select(cfg, prefix, default=None)
316
+ if v is None:
317
+ break
318
+ if not OmegaConf.is_config(v):
319
+ raise KeyError(
320
+ f"Trying to update key {key}, but {prefix} "
321
+ f"is not a config, but has type {type(v)}."
322
+ )
323
+ OmegaConf.update(cfg, key, value, merge=True)
324
+
325
+ from hydra.core.override_parser.overrides_parser import OverridesParser
326
+
327
+ parser = OverridesParser.create()
328
+ overrides = parser.parse_overrides(overrides)
329
+ for o in overrides:
330
+ key = o.key_or_group
331
+ value = o.value()
332
+ if o.is_delete():
333
+ # TODO support this
334
+ raise NotImplementedError("deletion is not yet a supported override")
335
+ safe_update(cfg, key, value)
336
+ return cfg
337
+
338
+ @staticmethod
339
+ def to_py(cfg, prefix: str = "cfg."):
340
+ """
341
+ Try to convert a config object into Python-like psuedo code.
342
+
343
+ Note that perfect conversion is not always possible. So the returned
344
+ results are mainly meant to be human-readable, and not meant to be executed.
345
+
346
+ Args:
347
+ cfg: an omegaconf config object
348
+ prefix: root name for the resulting code (default: "cfg.")
349
+
350
+
351
+ Returns:
352
+ str of formatted Python code
353
+ """
354
+ import black
355
+
356
+ cfg = OmegaConf.to_container(cfg, resolve=True)
357
+
358
+ def _to_str(obj, prefix=None, inside_call=False):
359
+ if prefix is None:
360
+ prefix = []
361
+ if isinstance(obj, abc.Mapping) and "_target_" in obj:
362
+ # Dict representing a function call
363
+ target = _convert_target_to_string(obj.pop("_target_"))
364
+ args = []
365
+ for k, v in sorted(obj.items()):
366
+ args.append(f"{k}={_to_str(v, inside_call=True)}")
367
+ args = ", ".join(args)
368
+ call = f"{target}({args})"
369
+ return "".join(prefix) + call
370
+ elif isinstance(obj, abc.Mapping) and not inside_call:
371
+ # Dict that is not inside a call is a list of top-level config objects that we
372
+ # render as one object per line with dot separated prefixes
373
+ key_list = []
374
+ for k, v in sorted(obj.items()):
375
+ if isinstance(v, abc.Mapping) and "_target_" not in v:
376
+ key_list.append(_to_str(v, prefix=prefix + [k + "."]))
377
+ else:
378
+ key = "".join(prefix) + k
379
+ key_list.append(f"{key}={_to_str(v)}")
380
+ return "\n".join(key_list)
381
+ elif isinstance(obj, abc.Mapping):
382
+ # Dict that is inside a call is rendered as a regular dict
383
+ return (
384
+ "{"
385
+ + ",".join(
386
+ f"{repr(k)}: {_to_str(v, inside_call=inside_call)}"
387
+ for k, v in sorted(obj.items())
388
+ )
389
+ + "}"
390
+ )
391
+ elif isinstance(obj, list):
392
+ return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]"
393
+ else:
394
+ return repr(obj)
395
+
396
+ py_str = _to_str(cfg, prefix=[prefix])
397
+ try:
398
+ return black.format_str(py_str, mode=black.Mode())
399
+ except black.InvalidInput:
400
+ return py_str
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from . import transforms # isort:skip
3
+
4
+ from .build import (
5
+ build_batch_data_loader,
6
+ build_detection_test_loader,
7
+ build_detection_train_loader,
8
+ get_detection_dataset_dicts,
9
+ load_proposals_into_dataset,
10
+ print_instances_class_histogram,
11
+ )
12
+ from .catalog import DatasetCatalog, MetadataCatalog, Metadata
13
+ from .common import DatasetFromList, MapDataset, ToIterableDataset
14
+ from .dataset_mapper import DatasetMapper
15
+
16
+ # ensure the builtin datasets are registered
17
+ from . import datasets, samplers # isort:skip
18
+
19
+ __all__ = [k for k in globals().keys() if not k.startswith("_")]
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/benchmark.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import logging
3
+ import numpy as np
4
+ from itertools import count
5
+ from typing import List, Tuple
6
+ import torch
7
+ import tqdm
8
+ from fvcore.common.timer import Timer
9
+
10
+ from detectron2.utils import comm
11
+
12
+ from .build import build_batch_data_loader
13
+ from .common import DatasetFromList, MapDataset
14
+ from .samplers import TrainingSampler
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class _EmptyMapDataset(torch.utils.data.Dataset):
20
+ """
21
+ Map anything to emptiness.
22
+ """
23
+
24
+ def __init__(self, dataset):
25
+ self.ds = dataset
26
+
27
+ def __len__(self):
28
+ return len(self.ds)
29
+
30
+ def __getitem__(self, idx):
31
+ _ = self.ds[idx]
32
+ return [0]
33
+
34
+
35
+ def iter_benchmark(
36
+ iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60
37
+ ) -> Tuple[float, List[float]]:
38
+ """
39
+ Benchmark an iterator/iterable for `num_iter` iterations with an extra
40
+ `warmup` iterations of warmup.
41
+ End early if `max_time_seconds` time is spent on iterations.
42
+
43
+ Returns:
44
+ float: average time (seconds) per iteration
45
+ list[float]: time spent on each iteration. Sometimes useful for further analysis.
46
+ """
47
+ num_iter, warmup = int(num_iter), int(warmup)
48
+
49
+ iterator = iter(iterator)
50
+ for _ in range(warmup):
51
+ next(iterator)
52
+ timer = Timer()
53
+ all_times = []
54
+ for curr_iter in tqdm.trange(num_iter):
55
+ start = timer.seconds()
56
+ if start > max_time_seconds:
57
+ num_iter = curr_iter
58
+ break
59
+ next(iterator)
60
+ all_times.append(timer.seconds() - start)
61
+ avg = timer.seconds() / num_iter
62
+ return avg, all_times
63
+
64
+
65
+ class DataLoaderBenchmark:
66
+ """
67
+ Some common benchmarks that help understand perf bottleneck of a standard dataloader
68
+ made of dataset, mapper and sampler.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ dataset,
74
+ *,
75
+ mapper,
76
+ sampler=None,
77
+ total_batch_size,
78
+ num_workers=0,
79
+ max_time_seconds: int = 90,
80
+ ):
81
+ """
82
+ Args:
83
+ max_time_seconds (int): maximum time to spent for each benchmark
84
+ other args: same as in `build.py:build_detection_train_loader`
85
+ """
86
+ if isinstance(dataset, list):
87
+ dataset = DatasetFromList(dataset, copy=False, serialize=True)
88
+ if sampler is None:
89
+ sampler = TrainingSampler(len(dataset))
90
+
91
+ self.dataset = dataset
92
+ self.mapper = mapper
93
+ self.sampler = sampler
94
+ self.total_batch_size = total_batch_size
95
+ self.num_workers = num_workers
96
+ self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size()
97
+
98
+ self.max_time_seconds = max_time_seconds
99
+
100
+ def _benchmark(self, iterator, num_iter, warmup, msg=None):
101
+ avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds)
102
+ if msg is not None:
103
+ self._log_time(msg, avg, all_times)
104
+ return avg, all_times
105
+
106
+ def _log_time(self, msg, avg, all_times, distributed=False):
107
+ percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]]
108
+ if not distributed:
109
+ logger.info(
110
+ f"{msg}: avg={1.0/avg:.1f} it/s, "
111
+ f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
112
+ f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
113
+ )
114
+ return
115
+ avg_per_gpu = comm.all_gather(avg)
116
+ percentiles_per_gpu = comm.all_gather(percentiles)
117
+ if comm.get_rank() > 0:
118
+ return
119
+ for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu):
120
+ logger.info(
121
+ f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, "
122
+ f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
123
+ f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
124
+ )
125
+
126
+ def benchmark_dataset(self, num_iter, warmup=5):
127
+ """
128
+ Benchmark the speed of taking raw samples from the dataset.
129
+ """
130
+
131
+ def loader():
132
+ while True:
133
+ for k in self.sampler:
134
+ yield self.dataset[k]
135
+
136
+ self._benchmark(loader(), num_iter, warmup, "Dataset Alone")
137
+
138
+ def benchmark_mapper(self, num_iter, warmup=5):
139
+ """
140
+ Benchmark the speed of taking raw samples from the dataset and map
141
+ them in a single process.
142
+ """
143
+
144
+ def loader():
145
+ while True:
146
+ for k in self.sampler:
147
+ yield self.mapper(self.dataset[k])
148
+
149
+ self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)")
150
+
151
+ def benchmark_workers(self, num_iter, warmup=10):
152
+ """
153
+ Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers].
154
+ """
155
+ candidates = [0, 1]
156
+ if self.num_workers not in candidates:
157
+ candidates.append(self.num_workers)
158
+
159
+ dataset = MapDataset(self.dataset, self.mapper)
160
+ for n in candidates:
161
+ loader = build_batch_data_loader(
162
+ dataset,
163
+ self.sampler,
164
+ self.total_batch_size,
165
+ num_workers=n,
166
+ )
167
+ self._benchmark(
168
+ iter(loader),
169
+ num_iter * max(n, 1),
170
+ warmup * max(n, 1),
171
+ f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})",
172
+ )
173
+ del loader
174
+
175
+ def benchmark_IPC(self, num_iter, warmup=10):
176
+ """
177
+ Benchmark the dataloader where each worker outputs nothing. This
178
+ eliminates the IPC overhead compared to the regular dataloader.
179
+
180
+ PyTorch multiprocessing's IPC only optimizes for torch tensors.
181
+ Large numpy arrays or other data structure may incur large IPC overhead.
182
+ """
183
+ n = self.num_workers
184
+ dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper))
185
+ loader = build_batch_data_loader(
186
+ dataset, self.sampler, self.total_batch_size, num_workers=n
187
+ )
188
+ self._benchmark(
189
+ iter(loader),
190
+ num_iter * max(n, 1),
191
+ warmup * max(n, 1),
192
+ f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm",
193
+ )
194
+
195
+ def benchmark_distributed(self, num_iter, warmup=10):
196
+ """
197
+ Benchmark the dataloader in each distributed worker, and log results of
198
+ all workers. This helps understand the final performance as well as
199
+ the variances among workers.
200
+
201
+ It also prints startup time (first iter) of the dataloader.
202
+ """
203
+ gpu = comm.get_world_size()
204
+ dataset = MapDataset(self.dataset, self.mapper)
205
+ n = self.num_workers
206
+ loader = build_batch_data_loader(
207
+ dataset, self.sampler, self.total_batch_size, num_workers=n
208
+ )
209
+
210
+ timer = Timer()
211
+ loader = iter(loader)
212
+ next(loader)
213
+ startup_time = timer.seconds()
214
+ logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time))
215
+
216
+ comm.synchronize()
217
+
218
+ avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1))
219
+ del loader
220
+ self._log_time(
221
+ f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})",
222
+ avg,
223
+ all_times,
224
+ True,
225
+ )
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/build.py ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import itertools
3
+ import logging
4
+ import numpy as np
5
+ import operator
6
+ import pickle
7
+ from typing import Any, Callable, Dict, List, Optional, Union
8
+ import torch
9
+ import torch.utils.data as torchdata
10
+ from tabulate import tabulate
11
+ from termcolor import colored
12
+
13
+ from detectron2.config import configurable
14
+ from detectron2.structures import BoxMode
15
+ from detectron2.utils.comm import get_world_size
16
+ from detectron2.utils.env import seed_all_rng
17
+ from detectron2.utils.file_io import PathManager
18
+ from detectron2.utils.logger import _log_api_usage, log_first_n
19
+
20
+ from .catalog import DatasetCatalog, MetadataCatalog
21
+ from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset
22
+ from .dataset_mapper import DatasetMapper
23
+ from .detection_utils import check_metadata_consistency
24
+ from .samplers import (
25
+ InferenceSampler,
26
+ RandomSubsetTrainingSampler,
27
+ RepeatFactorTrainingSampler,
28
+ TrainingSampler,
29
+ )
30
+
31
+ """
32
+ This file contains the default logic to build a dataloader for training or testing.
33
+ """
34
+
35
+ __all__ = [
36
+ "build_batch_data_loader",
37
+ "build_detection_train_loader",
38
+ "build_detection_test_loader",
39
+ "get_detection_dataset_dicts",
40
+ "load_proposals_into_dataset",
41
+ "print_instances_class_histogram",
42
+ ]
43
+
44
+
45
+ def filter_images_with_only_crowd_annotations(dataset_dicts):
46
+ """
47
+ Filter out images with none annotations or only crowd annotations
48
+ (i.e., images without non-crowd annotations).
49
+ A common training-time preprocessing on COCO dataset.
50
+
51
+ Args:
52
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
53
+
54
+ Returns:
55
+ list[dict]: the same format, but filtered.
56
+ """
57
+ num_before = len(dataset_dicts)
58
+
59
+ def valid(anns):
60
+ for ann in anns:
61
+ if ann.get("iscrowd", 0) == 0:
62
+ return True
63
+ return False
64
+
65
+ dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
66
+ num_after = len(dataset_dicts)
67
+ logger = logging.getLogger(__name__)
68
+ logger.info(
69
+ "Removed {} images with no usable annotations. {} images left.".format(
70
+ num_before - num_after, num_after
71
+ )
72
+ )
73
+ return dataset_dicts
74
+
75
+
76
+ def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
77
+ """
78
+ Filter out images with too few number of keypoints.
79
+
80
+ Args:
81
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
82
+
83
+ Returns:
84
+ list[dict]: the same format as dataset_dicts, but filtered.
85
+ """
86
+ num_before = len(dataset_dicts)
87
+
88
+ def visible_keypoints_in_image(dic):
89
+ # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
90
+ annotations = dic["annotations"]
91
+ return sum(
92
+ (np.array(ann["keypoints"][2::3]) > 0).sum()
93
+ for ann in annotations
94
+ if "keypoints" in ann
95
+ )
96
+
97
+ dataset_dicts = [
98
+ x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
99
+ ]
100
+ num_after = len(dataset_dicts)
101
+ logger = logging.getLogger(__name__)
102
+ logger.info(
103
+ "Removed {} images with fewer than {} keypoints.".format(
104
+ num_before - num_after, min_keypoints_per_image
105
+ )
106
+ )
107
+ return dataset_dicts
108
+
109
+
110
+ def load_proposals_into_dataset(dataset_dicts, proposal_file):
111
+ """
112
+ Load precomputed object proposals into the dataset.
113
+
114
+ The proposal file should be a pickled dict with the following keys:
115
+
116
+ - "ids": list[int] or list[str], the image ids
117
+ - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
118
+ - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
119
+ corresponding to the boxes.
120
+ - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
121
+
122
+ Args:
123
+ dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
124
+ proposal_file (str): file path of pre-computed proposals, in pkl format.
125
+
126
+ Returns:
127
+ list[dict]: the same format as dataset_dicts, but added proposal field.
128
+ """
129
+ logger = logging.getLogger(__name__)
130
+ logger.info("Loading proposals from: {}".format(proposal_file))
131
+
132
+ with PathManager.open(proposal_file, "rb") as f:
133
+ proposals = pickle.load(f, encoding="latin1")
134
+
135
+ # Rename the key names in D1 proposal files
136
+ rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
137
+ for key in rename_keys:
138
+ if key in proposals:
139
+ proposals[rename_keys[key]] = proposals.pop(key)
140
+
141
+ # Fetch the indexes of all proposals that are in the dataset
142
+ # Convert image_id to str since they could be int.
143
+ img_ids = set({str(record["image_id"]) for record in dataset_dicts})
144
+ id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
145
+
146
+ # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
147
+ bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
148
+
149
+ for record in dataset_dicts:
150
+ # Get the index of the proposal
151
+ i = id_to_index[str(record["image_id"])]
152
+
153
+ boxes = proposals["boxes"][i]
154
+ objectness_logits = proposals["objectness_logits"][i]
155
+ # Sort the proposals in descending order of the scores
156
+ inds = objectness_logits.argsort()[::-1]
157
+ record["proposal_boxes"] = boxes[inds]
158
+ record["proposal_objectness_logits"] = objectness_logits[inds]
159
+ record["proposal_bbox_mode"] = bbox_mode
160
+
161
+ return dataset_dicts
162
+
163
+
164
+ def print_instances_class_histogram(dataset_dicts, class_names):
165
+ """
166
+ Args:
167
+ dataset_dicts (list[dict]): list of dataset dicts.
168
+ class_names (list[str]): list of class names (zero-indexed).
169
+ """
170
+ num_classes = len(class_names)
171
+ hist_bins = np.arange(num_classes + 1)
172
+ histogram = np.zeros((num_classes,), dtype=np.int)
173
+ for entry in dataset_dicts:
174
+ annos = entry["annotations"]
175
+ classes = np.asarray(
176
+ [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=np.int
177
+ )
178
+ if len(classes):
179
+ assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
180
+ assert (
181
+ classes.max() < num_classes
182
+ ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
183
+ histogram += np.histogram(classes, bins=hist_bins)[0]
184
+
185
+ N_COLS = min(6, len(class_names) * 2)
186
+
187
+ def short_name(x):
188
+ # make long class names shorter. useful for lvis
189
+ if len(x) > 13:
190
+ return x[:11] + ".."
191
+ return x
192
+
193
+ data = list(
194
+ itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
195
+ )
196
+ total_num_instances = sum(data[1::2])
197
+ data.extend([None] * (N_COLS - (len(data) % N_COLS)))
198
+ if num_classes > 1:
199
+ data.extend(["total", total_num_instances])
200
+ data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
201
+ table = tabulate(
202
+ data,
203
+ headers=["category", "#instances"] * (N_COLS // 2),
204
+ tablefmt="pipe",
205
+ numalign="left",
206
+ stralign="center",
207
+ )
208
+ log_first_n(
209
+ logging.INFO,
210
+ "Distribution of instances among all {} categories:\n".format(num_classes)
211
+ + colored(table, "cyan"),
212
+ key="message",
213
+ )
214
+
215
+
216
+ def get_detection_dataset_dicts(
217
+ names,
218
+ filter_empty=True,
219
+ min_keypoints=0,
220
+ proposal_files=None,
221
+ check_consistency=True,
222
+ ):
223
+ """
224
+ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
225
+
226
+ Args:
227
+ names (str or list[str]): a dataset name or a list of dataset names
228
+ filter_empty (bool): whether to filter out images without instance annotations
229
+ min_keypoints (int): filter out images with fewer keypoints than
230
+ `min_keypoints`. Set to 0 to do nothing.
231
+ proposal_files (list[str]): if given, a list of object proposal files
232
+ that match each dataset in `names`.
233
+ check_consistency (bool): whether to check if datasets have consistent metadata.
234
+
235
+ Returns:
236
+ list[dict]: a list of dicts following the standard dataset dict format.
237
+ """
238
+ if isinstance(names, str):
239
+ names = [names]
240
+ assert len(names), names
241
+ dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
242
+
243
+ if isinstance(dataset_dicts[0], torchdata.Dataset):
244
+ if len(dataset_dicts) > 1:
245
+ # ConcatDataset does not work for iterable style dataset.
246
+ # We could support concat for iterable as well, but it's often
247
+ # not a good idea to concat iterables anyway.
248
+ return torchdata.ConcatDataset(dataset_dicts)
249
+ return dataset_dicts[0]
250
+
251
+ for dataset_name, dicts in zip(names, dataset_dicts):
252
+ assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
253
+
254
+ if proposal_files is not None:
255
+ assert len(names) == len(proposal_files)
256
+ # load precomputed proposals from proposal files
257
+ dataset_dicts = [
258
+ load_proposals_into_dataset(dataset_i_dicts, proposal_file)
259
+ for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
260
+ ]
261
+
262
+ dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
263
+
264
+ has_instances = "annotations" in dataset_dicts[0]
265
+ if filter_empty and has_instances:
266
+ dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
267
+ if min_keypoints > 0 and has_instances:
268
+ dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
269
+
270
+ if check_consistency and has_instances:
271
+ try:
272
+ class_names = MetadataCatalog.get(names[0]).thing_classes
273
+ check_metadata_consistency("thing_classes", names)
274
+ print_instances_class_histogram(dataset_dicts, class_names)
275
+ except AttributeError: # class names are not available for this dataset
276
+ pass
277
+
278
+ assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
279
+ return dataset_dicts
280
+
281
+
282
+ def build_batch_data_loader(
283
+ dataset,
284
+ sampler,
285
+ total_batch_size,
286
+ *,
287
+ aspect_ratio_grouping=False,
288
+ num_workers=0,
289
+ collate_fn=None,
290
+ ):
291
+ """
292
+ Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
293
+ 1. support aspect ratio grouping options
294
+ 2. use no "batch collation", because this is common for detection training
295
+
296
+ Args:
297
+ dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset.
298
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices.
299
+ Must be provided iff. ``dataset`` is a map-style dataset.
300
+ total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see
301
+ :func:`build_detection_train_loader`.
302
+
303
+ Returns:
304
+ iterable[list]. Length of each list is the batch size of the current
305
+ GPU. Each element in the list comes from the dataset.
306
+ """
307
+ world_size = get_world_size()
308
+ assert (
309
+ total_batch_size > 0 and total_batch_size % world_size == 0
310
+ ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
311
+ total_batch_size, world_size
312
+ )
313
+ batch_size = total_batch_size // world_size
314
+
315
+ if isinstance(dataset, torchdata.IterableDataset):
316
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
317
+ else:
318
+ dataset = ToIterableDataset(dataset, sampler)
319
+
320
+ if aspect_ratio_grouping:
321
+ data_loader = torchdata.DataLoader(
322
+ dataset,
323
+ num_workers=num_workers,
324
+ collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
325
+ worker_init_fn=worker_init_reset_seed,
326
+ ) # yield individual mapped dict
327
+ data_loader = AspectRatioGroupedDataset(data_loader, batch_size)
328
+ if collate_fn is None:
329
+ return data_loader
330
+ return MapDataset(data_loader, collate_fn)
331
+ else:
332
+ return torchdata.DataLoader(
333
+ dataset,
334
+ batch_size=batch_size,
335
+ drop_last=True,
336
+ num_workers=num_workers,
337
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
338
+ worker_init_fn=worker_init_reset_seed,
339
+ )
340
+
341
+
342
+ def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
343
+ if dataset is None:
344
+ dataset = get_detection_dataset_dicts(
345
+ cfg.DATASETS.TRAIN,
346
+ filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
347
+ min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
348
+ if cfg.MODEL.KEYPOINT_ON
349
+ else 0,
350
+ proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
351
+ )
352
+ _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
353
+
354
+ if mapper is None:
355
+ mapper = DatasetMapper(cfg, True)
356
+
357
+ if sampler is None:
358
+ sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
359
+ logger = logging.getLogger(__name__)
360
+ if isinstance(dataset, torchdata.IterableDataset):
361
+ logger.info("Not using any sampler since the dataset is IterableDataset.")
362
+ sampler = None
363
+ else:
364
+ logger.info("Using training sampler {}".format(sampler_name))
365
+ if sampler_name == "TrainingSampler":
366
+ sampler = TrainingSampler(len(dataset))
367
+ elif sampler_name == "RepeatFactorTrainingSampler":
368
+ repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
369
+ dataset, cfg.DATALOADER.REPEAT_THRESHOLD
370
+ )
371
+ sampler = RepeatFactorTrainingSampler(repeat_factors)
372
+ elif sampler_name == "RandomSubsetTrainingSampler":
373
+ sampler = RandomSubsetTrainingSampler(
374
+ len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO
375
+ )
376
+ else:
377
+ raise ValueError("Unknown training sampler: {}".format(sampler_name))
378
+
379
+ return {
380
+ "dataset": dataset,
381
+ "sampler": sampler,
382
+ "mapper": mapper,
383
+ "total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
384
+ "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
385
+ "num_workers": cfg.DATALOADER.NUM_WORKERS,
386
+ }
387
+
388
+
389
+ @configurable(from_config=_train_loader_from_config)
390
+ def build_detection_train_loader(
391
+ dataset,
392
+ *,
393
+ mapper,
394
+ sampler=None,
395
+ total_batch_size,
396
+ aspect_ratio_grouping=True,
397
+ num_workers=0,
398
+ collate_fn=None,
399
+ ):
400
+ """
401
+ Build a dataloader for object detection with some default features.
402
+
403
+ Args:
404
+ dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
405
+ or a pytorch dataset (either map-style or iterable). It can be obtained
406
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
407
+ mapper (callable): a callable which takes a sample (dict) from dataset and
408
+ returns the format to be consumed by the model.
409
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
410
+ sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
411
+ indices to be applied on ``dataset``.
412
+ If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`,
413
+ which coordinates an infinite random shuffle sequence across all workers.
414
+ Sampler must be None if ``dataset`` is iterable.
415
+ total_batch_size (int): total batch size across all workers.
416
+ aspect_ratio_grouping (bool): whether to group images with similar
417
+ aspect ratio for efficiency. When enabled, it requires each
418
+ element in dataset be a dict with keys "width" and "height".
419
+ num_workers (int): number of parallel data loading workers
420
+ collate_fn: a function that determines how to do batching, same as the argument of
421
+ `torch.utils.data.DataLoader`. Defaults to do no collation and return a list of
422
+ data. No collation is OK for small batch size and simple data structures.
423
+ If your batch size is large and each sample contains too many small tensors,
424
+ it's more efficient to collate them in data loader.
425
+
426
+ Returns:
427
+ torch.utils.data.DataLoader:
428
+ a dataloader. Each output from it is a ``list[mapped_element]`` of length
429
+ ``total_batch_size / num_workers``, where ``mapped_element`` is produced
430
+ by the ``mapper``.
431
+ """
432
+ if isinstance(dataset, list):
433
+ dataset = DatasetFromList(dataset, copy=False)
434
+ if mapper is not None:
435
+ dataset = MapDataset(dataset, mapper)
436
+
437
+ if isinstance(dataset, torchdata.IterableDataset):
438
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
439
+ else:
440
+ if sampler is None:
441
+ sampler = TrainingSampler(len(dataset))
442
+ assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}"
443
+ return build_batch_data_loader(
444
+ dataset,
445
+ sampler,
446
+ total_batch_size,
447
+ aspect_ratio_grouping=aspect_ratio_grouping,
448
+ num_workers=num_workers,
449
+ collate_fn=collate_fn,
450
+ )
451
+
452
+
453
+ def _test_loader_from_config(cfg, dataset_name, mapper=None):
454
+ """
455
+ Uses the given `dataset_name` argument (instead of the names in cfg), because the
456
+ standard practice is to evaluate each test set individually (not combining them).
457
+ """
458
+ if isinstance(dataset_name, str):
459
+ dataset_name = [dataset_name]
460
+
461
+ dataset = get_detection_dataset_dicts(
462
+ dataset_name,
463
+ filter_empty=False,
464
+ proposal_files=[
465
+ cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)] for x in dataset_name
466
+ ]
467
+ if cfg.MODEL.LOAD_PROPOSALS
468
+ else None,
469
+ )
470
+ if mapper is None:
471
+ mapper = DatasetMapper(cfg, False)
472
+ return {
473
+ "dataset": dataset,
474
+ "mapper": mapper,
475
+ "num_workers": cfg.DATALOADER.NUM_WORKERS,
476
+ "sampler": InferenceSampler(len(dataset))
477
+ if not isinstance(dataset, torchdata.IterableDataset)
478
+ else None,
479
+ }
480
+
481
+
482
+ @configurable(from_config=_test_loader_from_config)
483
+ def build_detection_test_loader(
484
+ dataset: Union[List[Any], torchdata.Dataset],
485
+ *,
486
+ mapper: Callable[[Dict[str, Any]], Any],
487
+ sampler: Optional[torchdata.Sampler] = None,
488
+ batch_size: int = 1,
489
+ num_workers: int = 0,
490
+ collate_fn: Optional[Callable[[List[Any]], Any]] = None,
491
+ ) -> torchdata.DataLoader:
492
+ """
493
+ Similar to `build_detection_train_loader`, with default batch size = 1,
494
+ and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
495
+ to produce the exact set of all samples.
496
+
497
+ Args:
498
+ dataset: a list of dataset dicts,
499
+ or a pytorch dataset (either map-style or iterable). They can be obtained
500
+ by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
501
+ mapper: a callable which takes a sample (dict) from dataset
502
+ and returns the format to be consumed by the model.
503
+ When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
504
+ sampler: a sampler that produces
505
+ indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
506
+ which splits the dataset across all workers. Sampler must be None
507
+ if `dataset` is iterable.
508
+ batch_size: the batch size of the data loader to be created.
509
+ Default to 1 image per worker since this is the standard when reporting
510
+ inference time in papers.
511
+ num_workers: number of parallel data loading workers
512
+ collate_fn: same as the argument of `torch.utils.data.DataLoader`.
513
+ Defaults to do no collation and return a list of data.
514
+
515
+ Returns:
516
+ DataLoader: a torch DataLoader, that loads the given detection
517
+ dataset, with test-time transformation and batching.
518
+
519
+ Examples:
520
+ ::
521
+ data_loader = build_detection_test_loader(
522
+ DatasetRegistry.get("my_test"),
523
+ mapper=DatasetMapper(...))
524
+
525
+ # or, instantiate with a CfgNode:
526
+ data_loader = build_detection_test_loader(cfg, "my_test")
527
+ """
528
+ if isinstance(dataset, list):
529
+ dataset = DatasetFromList(dataset, copy=False)
530
+ if mapper is not None:
531
+ dataset = MapDataset(dataset, mapper)
532
+ if isinstance(dataset, torchdata.IterableDataset):
533
+ assert sampler is None, "sampler must be None if dataset is IterableDataset"
534
+ else:
535
+ if sampler is None:
536
+ sampler = InferenceSampler(len(dataset))
537
+ return torchdata.DataLoader(
538
+ dataset,
539
+ batch_size=batch_size,
540
+ sampler=sampler,
541
+ drop_last=False,
542
+ num_workers=num_workers,
543
+ collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
544
+ )
545
+
546
+
547
+ def trivial_batch_collator(batch):
548
+ """
549
+ A batch collator that does nothing.
550
+ """
551
+ return batch
552
+
553
+
554
+ def worker_init_reset_seed(worker_id):
555
+ initial_seed = torch.initial_seed() % 2 ** 31
556
+ seed_all_rng(initial_seed + worker_id)
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/catalog.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+ import types
5
+ from collections import UserDict
6
+ from typing import List
7
+
8
+ from detectron2.utils.logger import log_first_n
9
+
10
+ __all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
11
+
12
+
13
+ class _DatasetCatalog(UserDict):
14
+ """
15
+ A global dictionary that stores information about the datasets and how to obtain them.
16
+
17
+ It contains a mapping from strings
18
+ (which are names that identify a dataset, e.g. "coco_2014_train")
19
+ to a function which parses the dataset and returns the samples in the
20
+ format of `list[dict]`.
21
+
22
+ The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
23
+ if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
24
+
25
+ The purpose of having this catalog is to make it easy to choose
26
+ different datasets, by just using the strings in the config.
27
+ """
28
+
29
+ def register(self, name, func):
30
+ """
31
+ Args:
32
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
33
+ func (callable): a callable which takes no arguments and returns a list of dicts.
34
+ It must return the same results if called multiple times.
35
+ """
36
+ assert callable(func), "You must register a function with `DatasetCatalog.register`!"
37
+ assert name not in self, "Dataset '{}' is already registered!".format(name)
38
+ self[name] = func
39
+
40
+ def get(self, name):
41
+ """
42
+ Call the registered function and return its results.
43
+
44
+ Args:
45
+ name (str): the name that identifies a dataset, e.g. "coco_2014_train".
46
+
47
+ Returns:
48
+ list[dict]: dataset annotations.
49
+ """
50
+ try:
51
+ f = self[name]
52
+ except KeyError as e:
53
+ raise KeyError(
54
+ "Dataset '{}' is not registered! Available datasets are: {}".format(
55
+ name, ", ".join(list(self.keys()))
56
+ )
57
+ ) from e
58
+ return f()
59
+
60
+ def list(self) -> List[str]:
61
+ """
62
+ List all registered datasets.
63
+
64
+ Returns:
65
+ list[str]
66
+ """
67
+ return list(self.keys())
68
+
69
+ def remove(self, name):
70
+ """
71
+ Alias of ``pop``.
72
+ """
73
+ self.pop(name)
74
+
75
+ def __str__(self):
76
+ return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
77
+
78
+ __repr__ = __str__
79
+
80
+
81
+ DatasetCatalog = _DatasetCatalog()
82
+ DatasetCatalog.__doc__ = (
83
+ _DatasetCatalog.__doc__
84
+ + """
85
+ .. automethod:: detectron2.data.catalog.DatasetCatalog.register
86
+ .. automethod:: detectron2.data.catalog.DatasetCatalog.get
87
+ """
88
+ )
89
+
90
+
91
+ class Metadata(types.SimpleNamespace):
92
+ """
93
+ A class that supports simple attribute setter/getter.
94
+ It is intended for storing metadata of a dataset and make it accessible globally.
95
+
96
+ Examples:
97
+ ::
98
+ # somewhere when you load the data:
99
+ MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
100
+
101
+ # somewhere when you print statistics or visualize:
102
+ classes = MetadataCatalog.get("mydataset").thing_classes
103
+ """
104
+
105
+ # the name of the dataset
106
+ # set default to N/A so that `self.name` in the errors will not trigger getattr again
107
+ name: str = "N/A"
108
+
109
+ _RENAMED = {
110
+ "class_names": "thing_classes",
111
+ "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
112
+ "stuff_class_names": "stuff_classes",
113
+ }
114
+
115
+ def __getattr__(self, key):
116
+ if key in self._RENAMED:
117
+ log_first_n(
118
+ logging.WARNING,
119
+ "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
120
+ n=10,
121
+ )
122
+ return getattr(self, self._RENAMED[key])
123
+
124
+ # "name" exists in every metadata
125
+ if len(self.__dict__) > 1:
126
+ raise AttributeError(
127
+ "Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
128
+ "keys are {}.".format(key, self.name, str(self.__dict__.keys()))
129
+ )
130
+ else:
131
+ raise AttributeError(
132
+ f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
133
+ "metadata is empty."
134
+ )
135
+
136
+ def __setattr__(self, key, val):
137
+ if key in self._RENAMED:
138
+ log_first_n(
139
+ logging.WARNING,
140
+ "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
141
+ n=10,
142
+ )
143
+ setattr(self, self._RENAMED[key], val)
144
+
145
+ # Ensure that metadata of the same name stays consistent
146
+ try:
147
+ oldval = getattr(self, key)
148
+ assert oldval == val, (
149
+ "Attribute '{}' in the metadata of '{}' cannot be set "
150
+ "to a different value!\n{} != {}".format(key, self.name, oldval, val)
151
+ )
152
+ except AttributeError:
153
+ super().__setattr__(key, val)
154
+
155
+ def as_dict(self):
156
+ """
157
+ Returns all the metadata as a dict.
158
+ Note that modifications to the returned dict will not reflect on the Metadata object.
159
+ """
160
+ return copy.copy(self.__dict__)
161
+
162
+ def set(self, **kwargs):
163
+ """
164
+ Set multiple metadata with kwargs.
165
+ """
166
+ for k, v in kwargs.items():
167
+ setattr(self, k, v)
168
+ return self
169
+
170
+ def get(self, key, default=None):
171
+ """
172
+ Access an attribute and return its value if exists.
173
+ Otherwise return default.
174
+ """
175
+ try:
176
+ return getattr(self, key)
177
+ except AttributeError:
178
+ return default
179
+
180
+
181
+ class _MetadataCatalog(UserDict):
182
+ """
183
+ MetadataCatalog is a global dictionary that provides access to
184
+ :class:`Metadata` of a given dataset.
185
+
186
+ The metadata associated with a certain name is a singleton: once created, the
187
+ metadata will stay alive and will be returned by future calls to ``get(name)``.
188
+
189
+ It's like global variables, so don't abuse it.
190
+ It's meant for storing knowledge that's constant and shared across the execution
191
+ of the program, e.g.: the class names in COCO.
192
+ """
193
+
194
+ def get(self, name):
195
+ """
196
+ Args:
197
+ name (str): name of a dataset (e.g. coco_2014_train).
198
+
199
+ Returns:
200
+ Metadata: The :class:`Metadata` instance associated with this name,
201
+ or create an empty one if none is available.
202
+ """
203
+ assert len(name)
204
+ r = super().get(name, None)
205
+ if r is None:
206
+ r = self[name] = Metadata(name=name)
207
+ return r
208
+
209
+ def list(self):
210
+ """
211
+ List all registered metadata.
212
+
213
+ Returns:
214
+ list[str]: keys (names of datasets) of all registered metadata
215
+ """
216
+ return list(self.keys())
217
+
218
+ def remove(self, name):
219
+ """
220
+ Alias of ``pop``.
221
+ """
222
+ self.pop(name)
223
+
224
+ def __str__(self):
225
+ return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
226
+
227
+ __repr__ = __str__
228
+
229
+
230
+ MetadataCatalog = _MetadataCatalog()
231
+ MetadataCatalog.__doc__ = (
232
+ _MetadataCatalog.__doc__
233
+ + """
234
+ .. automethod:: detectron2.data.catalog.MetadataCatalog.get
235
+ """
236
+ )
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/common.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import itertools
4
+ import logging
5
+ import numpy as np
6
+ import pickle
7
+ import random
8
+ import torch.utils.data as data
9
+ from torch.utils.data.sampler import Sampler
10
+
11
+ from detectron2.utils.serialize import PicklableWrapper
12
+
13
+ __all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
14
+
15
+
16
+ def _shard_iterator_dataloader_worker(iterable):
17
+ # Shard the iterable if we're currently inside pytorch dataloader worker.
18
+ worker_info = data.get_worker_info()
19
+ if worker_info is None or worker_info.num_workers == 1:
20
+ # do nothing
21
+ yield from iterable
22
+ else:
23
+ yield from itertools.islice(iterable, worker_info.id, None, worker_info.num_workers)
24
+
25
+
26
+ class _MapIterableDataset(data.IterableDataset):
27
+ """
28
+ Map a function over elements in an IterableDataset.
29
+
30
+ Similar to pytorch's MapIterDataPipe, but support filtering when map_func
31
+ returns None.
32
+
33
+ This class is not public-facing. Will be called by `MapDataset`.
34
+ """
35
+
36
+ def __init__(self, dataset, map_func):
37
+ self._dataset = dataset
38
+ self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
39
+
40
+ def __len__(self):
41
+ return len(self._dataset)
42
+
43
+ def __iter__(self):
44
+ for x in map(self._map_func, self._dataset):
45
+ if x is not None:
46
+ yield x
47
+
48
+
49
+ class MapDataset(data.Dataset):
50
+ """
51
+ Map a function over the elements in a dataset.
52
+ """
53
+
54
+ def __init__(self, dataset, map_func):
55
+ """
56
+ Args:
57
+ dataset: a dataset where map function is applied. Can be either
58
+ map-style or iterable dataset. When given an iterable dataset,
59
+ the returned object will also be an iterable dataset.
60
+ map_func: a callable which maps the element in dataset. map_func can
61
+ return None to skip the data (e.g. in case of errors).
62
+ How None is handled depends on the style of `dataset`.
63
+ If `dataset` is map-style, it randomly tries other elements.
64
+ If `dataset` is iterable, it skips the data and tries the next.
65
+ """
66
+ self._dataset = dataset
67
+ self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
68
+
69
+ self._rng = random.Random(42)
70
+ self._fallback_candidates = set(range(len(dataset)))
71
+
72
+ def __new__(cls, dataset, map_func):
73
+ is_iterable = isinstance(dataset, data.IterableDataset)
74
+ if is_iterable:
75
+ return _MapIterableDataset(dataset, map_func)
76
+ else:
77
+ return super().__new__(cls)
78
+
79
+ def __getnewargs__(self):
80
+ return self._dataset, self._map_func
81
+
82
+ def __len__(self):
83
+ return len(self._dataset)
84
+
85
+ def __getitem__(self, idx):
86
+ retry_count = 0
87
+ cur_idx = int(idx)
88
+
89
+ while True:
90
+ data = self._map_func(self._dataset[cur_idx])
91
+ if data is not None:
92
+ self._fallback_candidates.add(cur_idx)
93
+ return data
94
+
95
+ # _map_func fails for this idx, use a random new index from the pool
96
+ retry_count += 1
97
+ self._fallback_candidates.discard(cur_idx)
98
+ cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
99
+
100
+ if retry_count >= 3:
101
+ logger = logging.getLogger(__name__)
102
+ logger.warning(
103
+ "Failed to apply `_map_func` for idx: {}, retry count: {}".format(
104
+ idx, retry_count
105
+ )
106
+ )
107
+
108
+
109
+ class DatasetFromList(data.Dataset):
110
+ """
111
+ Wrap a list to a torch Dataset. It produces elements of the list as data.
112
+ """
113
+
114
+ def __init__(self, lst: list, copy: bool = True, serialize: bool = True):
115
+ """
116
+ Args:
117
+ lst (list): a list which contains elements to produce.
118
+ copy (bool): whether to deepcopy the element when producing it,
119
+ so that the result can be modified in place without affecting the
120
+ source in the list.
121
+ serialize (bool): whether to hold memory using serialized objects, when
122
+ enabled, data loader workers can use shared RAM from master
123
+ process instead of making a copy.
124
+ """
125
+ self._lst = lst
126
+ self._copy = copy
127
+ self._serialize = serialize
128
+
129
+ def _serialize(data):
130
+ buffer = pickle.dumps(data, protocol=-1)
131
+ return np.frombuffer(buffer, dtype=np.uint8)
132
+
133
+ if self._serialize:
134
+ logger = logging.getLogger(__name__)
135
+ logger.info(
136
+ "Serializing {} elements to byte tensors and concatenating them all ...".format(
137
+ len(self._lst)
138
+ )
139
+ )
140
+ self._lst = [_serialize(x) for x in self._lst]
141
+ self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
142
+ self._addr = np.cumsum(self._addr)
143
+ self._lst = np.concatenate(self._lst)
144
+ logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2))
145
+
146
+ def __len__(self):
147
+ if self._serialize:
148
+ return len(self._addr)
149
+ else:
150
+ return len(self._lst)
151
+
152
+ def __getitem__(self, idx):
153
+ if self._serialize:
154
+ start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
155
+ end_addr = self._addr[idx].item()
156
+ bytes = memoryview(self._lst[start_addr:end_addr])
157
+ return pickle.loads(bytes)
158
+ elif self._copy:
159
+ return copy.deepcopy(self._lst[idx])
160
+ else:
161
+ return self._lst[idx]
162
+
163
+
164
+ class ToIterableDataset(data.IterableDataset):
165
+ """
166
+ Convert an old indices-based (also called map-style) dataset
167
+ to an iterable-style dataset.
168
+ """
169
+
170
+ def __init__(self, dataset: data.Dataset, sampler: Sampler, shard_sampler: bool = True):
171
+ """
172
+ Args:
173
+ dataset: an old-style dataset with ``__getitem__``
174
+ sampler: a cheap iterable that produces indices to be applied on ``dataset``.
175
+ shard_sampler: whether to shard the sampler based on the current pytorch data loader
176
+ worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple
177
+ workers, it is responsible for sharding its data based on worker id so that workers
178
+ don't produce identical data.
179
+
180
+ Most samplers (like our TrainingSampler) do not shard based on dataloader worker id
181
+ and this argument should be set to True. But certain samplers may be already
182
+ sharded, in that case this argument should be set to False.
183
+ """
184
+ assert not isinstance(dataset, data.IterableDataset), dataset
185
+ assert isinstance(sampler, Sampler), sampler
186
+ self.dataset = dataset
187
+ self.sampler = sampler
188
+ self.shard_sampler = shard_sampler
189
+
190
+ def __iter__(self):
191
+ if not self.shard_sampler:
192
+ sampler = self.sampler
193
+ else:
194
+ # With map-style dataset, `DataLoader(dataset, sampler)` runs the
195
+ # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
196
+ # will run sampler in every of the N worker. So we should only keep 1/N of the ids on
197
+ # each worker. The assumption is that sampler is cheap to iterate so it's fine to
198
+ # discard ids in workers.
199
+ sampler = _shard_iterator_dataloader_worker(self.sampler)
200
+ for idx in sampler:
201
+ yield self.dataset[idx]
202
+
203
+ def __len__(self):
204
+ return len(self.sampler)
205
+
206
+
207
+ class AspectRatioGroupedDataset(data.IterableDataset):
208
+ """
209
+ Batch data that have similar aspect ratio together.
210
+ In this implementation, images whose aspect ratio < (or >) 1 will
211
+ be batched together.
212
+ This improves training speed because the images then need less padding
213
+ to form a batch.
214
+
215
+ It assumes the underlying dataset produces dicts with "width" and "height" keys.
216
+ It will then produce a list of original dicts with length = batch_size,
217
+ all with similar aspect ratios.
218
+ """
219
+
220
+ def __init__(self, dataset, batch_size):
221
+ """
222
+ Args:
223
+ dataset: an iterable. Each element must be a dict with keys
224
+ "width" and "height", which will be used to batch data.
225
+ batch_size (int):
226
+ """
227
+ self.dataset = dataset
228
+ self.batch_size = batch_size
229
+ self._buckets = [[] for _ in range(2)]
230
+ # Hard-coded two aspect ratio groups: w > h and w < h.
231
+ # Can add support for more aspect ratio groups, but doesn't seem useful
232
+
233
+ def __iter__(self):
234
+ for d in self.dataset:
235
+ w, h = d["width"], d["height"]
236
+ bucket_id = 0 if w > h else 1
237
+ bucket = self._buckets[bucket_id]
238
+ bucket.append(d)
239
+ if len(bucket) == self.batch_size:
240
+ data = bucket[:]
241
+ # Clear bucket first, because code after yield is not
242
+ # guaranteed to execute
243
+ del bucket[:]
244
+ yield data
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/dataset_mapper.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ import copy
3
+ import logging
4
+ import numpy as np
5
+ from typing import List, Optional, Union
6
+ import torch
7
+
8
+ from detectron2.config import configurable
9
+
10
+ from . import detection_utils as utils
11
+ from . import transforms as T
12
+
13
+ """
14
+ This file contains the default mapping that's applied to "dataset dicts".
15
+ """
16
+
17
+ __all__ = ["DatasetMapper"]
18
+
19
+
20
+ class DatasetMapper:
21
+ """
22
+ A callable which takes a dataset dict in Detectron2 Dataset format,
23
+ and map it into a format used by the model.
24
+
25
+ This is the default callable to be used to map your dataset dict into training data.
26
+ You may need to follow it to implement your own one for customized logic,
27
+ such as a different way to read or transform images.
28
+ See :doc:`/tutorials/data_loading` for details.
29
+
30
+ The callable currently does the following:
31
+
32
+ 1. Read the image from "file_name"
33
+ 2. Applies cropping/geometric transforms to the image and annotations
34
+ 3. Prepare data and annotations to Tensor and :class:`Instances`
35
+ """
36
+
37
+ @configurable
38
+ def __init__(
39
+ self,
40
+ is_train: bool,
41
+ *,
42
+ augmentations: List[Union[T.Augmentation, T.Transform]],
43
+ image_format: str,
44
+ use_instance_mask: bool = False,
45
+ use_keypoint: bool = False,
46
+ instance_mask_format: str = "polygon",
47
+ keypoint_hflip_indices: Optional[np.ndarray] = None,
48
+ precomputed_proposal_topk: Optional[int] = None,
49
+ recompute_boxes: bool = False,
50
+ ):
51
+ """
52
+ NOTE: this interface is experimental.
53
+
54
+ Args:
55
+ is_train: whether it's used in training or inference
56
+ augmentations: a list of augmentations or deterministic transforms to apply
57
+ image_format: an image format supported by :func:`detection_utils.read_image`.
58
+ use_instance_mask: whether to process instance segmentation annotations, if available
59
+ use_keypoint: whether to process keypoint annotations if available
60
+ instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
61
+ masks into this format.
62
+ keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
63
+ precomputed_proposal_topk: if given, will load pre-computed
64
+ proposals from dataset_dict and keep the top k proposals for each image.
65
+ recompute_boxes: whether to overwrite bounding box annotations
66
+ by computing tight bounding boxes from instance mask annotations.
67
+ """
68
+ if recompute_boxes:
69
+ assert use_instance_mask, "recompute_boxes requires instance masks"
70
+ # fmt: off
71
+ self.is_train = is_train
72
+ self.augmentations = T.AugmentationList(augmentations)
73
+ self.image_format = image_format
74
+ self.use_instance_mask = use_instance_mask
75
+ self.instance_mask_format = instance_mask_format
76
+ self.use_keypoint = use_keypoint
77
+ self.keypoint_hflip_indices = keypoint_hflip_indices
78
+ self.proposal_topk = precomputed_proposal_topk
79
+ self.recompute_boxes = recompute_boxes
80
+ # fmt: on
81
+ logger = logging.getLogger(__name__)
82
+ mode = "training" if is_train else "inference"
83
+ logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
84
+
85
+ @classmethod
86
+ def from_config(cls, cfg, is_train: bool = True):
87
+ augs = utils.build_augmentation(cfg, is_train)
88
+ if cfg.INPUT.CROP.ENABLED and is_train:
89
+ augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
90
+ recompute_boxes = cfg.MODEL.MASK_ON
91
+ else:
92
+ recompute_boxes = False
93
+
94
+ ret = {
95
+ "is_train": is_train,
96
+ "augmentations": augs,
97
+ "image_format": cfg.INPUT.FORMAT,
98
+ "use_instance_mask": cfg.MODEL.MASK_ON,
99
+ "instance_mask_format": cfg.INPUT.MASK_FORMAT,
100
+ "use_keypoint": cfg.MODEL.KEYPOINT_ON,
101
+ "recompute_boxes": recompute_boxes,
102
+ }
103
+
104
+ if cfg.MODEL.KEYPOINT_ON:
105
+ ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
106
+
107
+ if cfg.MODEL.LOAD_PROPOSALS:
108
+ ret["precomputed_proposal_topk"] = (
109
+ cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
110
+ if is_train
111
+ else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
112
+ )
113
+ return ret
114
+
115
+ def _transform_annotations(self, dataset_dict, transforms, image_shape):
116
+ # USER: Modify this if you want to keep them for some reason.
117
+ for anno in dataset_dict["annotations"]:
118
+ if not self.use_instance_mask:
119
+ anno.pop("segmentation", None)
120
+ if not self.use_keypoint:
121
+ anno.pop("keypoints", None)
122
+
123
+ # USER: Implement additional transformations if you have other types of data
124
+ annos = [
125
+ utils.transform_instance_annotations(
126
+ obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
127
+ )
128
+ for obj in dataset_dict.pop("annotations")
129
+ if obj.get("iscrowd", 0) == 0
130
+ ]
131
+ instances = utils.annotations_to_instances(
132
+ annos, image_shape, mask_format=self.instance_mask_format
133
+ )
134
+
135
+ # After transforms such as cropping are applied, the bounding box may no longer
136
+ # tightly bound the object. As an example, imagine a triangle object
137
+ # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
138
+ # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
139
+ # the intersection of original bounding box and the cropping box.
140
+ if self.recompute_boxes:
141
+ instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
142
+ dataset_dict["instances"] = utils.filter_empty_instances(instances)
143
+
144
+ def __call__(self, dataset_dict):
145
+ """
146
+ Args:
147
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
148
+
149
+ Returns:
150
+ dict: a format that builtin models in detectron2 accept
151
+ """
152
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
153
+ # USER: Write your own image loading if it's not from a file
154
+ image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
155
+ utils.check_image_size(dataset_dict, image)
156
+
157
+ # USER: Remove if you don't do semantic/panoptic segmentation.
158
+ if "sem_seg_file_name" in dataset_dict:
159
+ sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
160
+ else:
161
+ sem_seg_gt = None
162
+
163
+ aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
164
+ transforms = self.augmentations(aug_input)
165
+ image, sem_seg_gt = aug_input.image, aug_input.sem_seg
166
+
167
+ image_shape = image.shape[:2] # h, w
168
+ # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
169
+ # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
170
+ # Therefore it's important to use torch.Tensor.
171
+ dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
172
+ if sem_seg_gt is not None:
173
+ dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
174
+
175
+ # USER: Remove if you don't use pre-computed proposals.
176
+ # Most users would not need this feature.
177
+ if self.proposal_topk is not None:
178
+ utils.transform_proposals(
179
+ dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
180
+ )
181
+
182
+ if not self.is_train:
183
+ # USER: Modify this if you want to keep them for some reason.
184
+ dataset_dict.pop("annotations", None)
185
+ dataset_dict.pop("sem_seg_file_name", None)
186
+ return dataset_dict
187
+
188
+ if "annotations" in dataset_dict:
189
+ self._transform_annotations(dataset_dict, transforms, image_shape)
190
+
191
+ return dataset_dict
detectron2/build/lib.linux-x86_64-3.10/detectron2/data/datasets/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json
3
+ from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
4
+ from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
5
+ from .pascal_voc import load_voc_instances, register_pascal_voc
6
+ from . import builtin as _builtin # ensure the builtin datasets are registered
7
+
8
+
9
+ __all__ = [k for k in globals().keys() if not k.startswith("_")]