Charles Kabui commited on
Commit
2d81b98
·
2 Parent(s): 399308e a989db7

Add 'model/layout-model-training/' from commit 'b9fad076596272e427612d5e848da1ba8ea06b97'

Browse files

git-subtree-dir: model/layout-model-training
git-subtree-mainline: b404f5c2f60d251e639f628f3e66efcdd1357b99
git-subtree-split: b9fad076596272e427612d5e848da1ba8ea06b97

model/layout-model-training/.gitignore ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # folrder
2
+ data
3
+ data/
4
+ credential
5
+ credential/
6
+ model
7
+ model/
8
+ result
9
+ result*/
10
+ outputs/
11
+
12
+ # Mac Finder Configurations
13
+ .DS_Store
14
+
15
+ # IDEA configurations
16
+ .idea/
17
+
18
+ # IPython checkpoints
19
+ .ipynb_checkpoints/
20
+ log
21
+
22
+ # Visual Studio Code
23
+ .vscode/
24
+
25
+ # Byte-compiled / optimized / DLL files
26
+ __pycache__/
27
+ *.py[cod]
28
+ *$py.class
29
+
30
+ # C extensions
31
+ *.so
32
+
33
+ # Distribution / packaging
34
+ .Python
35
+ build/
36
+ develop-eggs/
37
+ dist/
38
+ downloads/
39
+ eggs/
40
+ .eggs/
41
+ lib64/
42
+ parts/
43
+ sdist/
44
+ var/
45
+ wheels/
46
+ *.egg-info/
47
+ .installed.cfg
48
+ *.egg
49
+ MANIFEST
50
+
51
+ # PyInstaller
52
+ # Usually these files are written by a python script from a template
53
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
54
+ *.manifest
55
+ *.spec
56
+
57
+ # Installer logs
58
+ pip-log.txt
59
+ pip-delete-this-directory.txt
60
+
61
+ # Unit test / coverage reports
62
+ htmlcov/
63
+ .tox/
64
+ .coverage
65
+ .coverage.*
66
+ .cache
67
+ nosetests.xml
68
+ coverage.xml
69
+ *.cover
70
+ .hypothesis/
71
+ .pytest_cache/
72
+
73
+ # Translations
74
+ *.mo
75
+ *.pot
76
+
77
+ # Django stuff:
78
+ *.log
79
+ local_settings.py
80
+ db.sqlite3
81
+
82
+ # Flask stuff:
83
+ instance/
84
+ .webassets-cache
85
+
86
+ # Scrapy stuff:
87
+ .scrapy
88
+
89
+ # Sphinx documentation
90
+ docs/_build/
91
+
92
+ # PyBuilder
93
+ target/
94
+
95
+ # Jupyter Notebook
96
+ .ipynb_checkpoints
97
+
98
+ # IPython
99
+ profile_default/
100
+ ipython_config.py
101
+
102
+ # pyenv
103
+ .python-version
104
+
105
+ # celery beat schedule file
106
+ celerybeat-schedule
107
+
108
+ # SageMath parsed files
109
+ *.sage.py
110
+
111
+ # Environments
112
+ .env
113
+ .venv
114
+ env/
115
+ venv/
116
+ ENV/
117
+ env.bak/
118
+ venv.bak/
119
+
120
+ # Spyder project settings
121
+ .spyderproject
122
+ .spyproject
123
+
124
+ # Rope project settings
125
+ .ropeproject
126
+
127
+ # mkdocs documentation
128
+ /site
129
+
130
+ # mypy
131
+ .mypy_cache/
132
+ .dmypy.json
133
+ dmypy.json
model/layout-model-training/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Scripts for training Layout Detection Models using Detectron2
2
+
3
+ ## Usage
4
+
5
+ ### Directory Structure
6
+
7
+ - In `tools/`, we provide a series of handy scripts for converting data formats and training the models.
8
+ - In `scripts/`, it lists specific command for running the code for processing the given dataset.
9
+ - The `configs/` contains the configuration for different deep learning models, and is organized by datasets.
10
+
11
+ ### How to train the models?
12
+
13
+ - Get the dataset and annotations -- if you are not sure, feel free to check [this tutorial](https://github.com/Layout-Parser/layout-parser/tree/main/examples/Customizing%20Layout%20Models%20with%20Label%20Studio%20Annotation).
14
+ - Duplicate and modify the config files and training scripts
15
+ - For example, you might want to copy [`configs/prima/fast_rcnn_R_50_FPN_3x`](configs/prima/fast_rcnn_R_50_FPN_3x.yaml) to [`configs/your-dataset-name/fast_rcnn_R_50_FPN_3x`](configs/prima/fast_rcnn_R_50_FPN_3x.yaml), and you can create your own `scripts/train_<your-dataset-name>.sh` based on [`scripts/train_prima.sh`](scripts/train_prima.sh).
16
+ - You'll modify the `--dataset_name`, `--json_annotation_train`, `--image_path_train`, `--json_annotation_val`, `--image_path_val`, and `--config-file` args appropriately.
17
+ - If you have a dataset with segmentation masks, you can try to train with the [`mask_rcnn model`](configs/prima/mask_rcnn_R_50_FPN_3x.yaml); otherwise you might want to start with the [`fast_rcnn model`](configs/prima/fast_rcnn_R_50_FPN_3x.yaml)
18
+ - If you see error `AttributeError: Cannot find field 'gt_masks' in the given Instances!` during training, this means you should not use
19
+
20
+ ## Supported Datasets
21
+
22
+ - Prima Layout Analysis Dataset [`scripts/train_prima.sh`](https://github.com/Layout-Parser/layout-model-training/blob/master/scripts/train_prima.sh)
23
+ - You will need to download the dataset from the [official website](https://www.primaresearch.org/dataset/) and put it in the `data/prima` folder.
24
+ - As the original dataset is stored in the [PAGE format](https://www.primaresearch.org/tools/PAGEViewer), the script will use [`tools/convert_prima_to_coco.py`](https://github.com/Layout-Parser/layout-model-training/blob/master/tools/convert_prima_to_coco.py) to convert it to COCO format.
25
+ - The final dataset folder structure should look like:
26
+ ```bash
27
+ data/
28
+ └── prima/
29
+ ├── Images/
30
+ ├── XML/
31
+ ├── License.txt
32
+ └── annotations*.json
33
+ ```
34
+
35
+ ## Reference
36
+
37
+ - **[cocosplit](https://github.com/akarazniewicz/cocosplit)** A script that splits the coco annotations into train and test sets.
38
+ - **[Detectron2](https://github.com/facebookresearch/detectron2)** Detectron2 is Facebook AI Research's next generation software system that implements state-of-the-art object detection algorithms.
model/layout-model-training/configs/prima/fast_rcnn_R_50_FPN_3x.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST: []
14
+ TRAIN: []
15
+ GLOBAL:
16
+ HACK: 1.0
17
+ INPUT:
18
+ CROP:
19
+ ENABLED: false
20
+ SIZE:
21
+ - 0.9
22
+ - 0.9
23
+ TYPE: relative_range
24
+ FORMAT: BGR
25
+ MASK_FORMAT: polygon
26
+ MAX_SIZE_TEST: 1333
27
+ MAX_SIZE_TRAIN: 1333
28
+ MIN_SIZE_TEST: 800
29
+ MIN_SIZE_TRAIN:
30
+ - 640
31
+ - 672
32
+ - 704
33
+ - 736
34
+ - 768
35
+ - 800
36
+ MIN_SIZE_TRAIN_SAMPLING: choice
37
+ MODEL:
38
+ ANCHOR_GENERATOR:
39
+ ANGLES:
40
+ - - -90
41
+ - 0
42
+ - 90
43
+ ASPECT_RATIOS:
44
+ - - 0.5
45
+ - 1.0
46
+ - 2.0
47
+ NAME: DefaultAnchorGenerator
48
+ OFFSET: 0.0
49
+ SIZES:
50
+ - - 32
51
+ - - 64
52
+ - - 128
53
+ - - 256
54
+ - - 512
55
+ BACKBONE:
56
+ FREEZE_AT: 2
57
+ NAME: build_resnet_fpn_backbone
58
+ DEVICE: cuda
59
+ FPN:
60
+ FUSE_TYPE: sum
61
+ IN_FEATURES:
62
+ - res2
63
+ - res3
64
+ - res4
65
+ - res5
66
+ NORM: ''
67
+ OUT_CHANNELS: 256
68
+ KEYPOINT_ON: false
69
+ LOAD_PROPOSALS: false
70
+ MASK_ON: false
71
+ META_ARCHITECTURE: GeneralizedRCNN
72
+ PANOPTIC_FPN:
73
+ COMBINE:
74
+ ENABLED: true
75
+ INSTANCES_CONFIDENCE_THRESH: 0.5
76
+ OVERLAP_THRESH: 0.5
77
+ STUFF_AREA_LIMIT: 4096
78
+ INSTANCE_LOSS_WEIGHT: 1.0
79
+ PIXEL_MEAN:
80
+ - 103.53
81
+ - 116.28
82
+ - 123.675
83
+ PIXEL_STD:
84
+ - 1.0
85
+ - 1.0
86
+ - 1.0
87
+ PROPOSAL_GENERATOR:
88
+ MIN_SIZE: 0
89
+ NAME: RPN
90
+ RESNETS:
91
+ DEFORM_MODULATED: false
92
+ DEFORM_NUM_GROUPS: 1
93
+ DEFORM_ON_PER_STAGE:
94
+ - false
95
+ - false
96
+ - false
97
+ - false
98
+ DEPTH: 50
99
+ NORM: FrozenBN
100
+ NUM_GROUPS: 1
101
+ OUT_FEATURES:
102
+ - res2
103
+ - res3
104
+ - res4
105
+ - res5
106
+ RES2_OUT_CHANNELS: 256
107
+ RES5_DILATION: 1
108
+ STEM_OUT_CHANNELS: 64
109
+ STRIDE_IN_1X1: true
110
+ WIDTH_PER_GROUP: 64
111
+ RETINANET:
112
+ BBOX_REG_WEIGHTS:
113
+ - 1.0
114
+ - 1.0
115
+ - 1.0
116
+ - 1.0
117
+ FOCAL_LOSS_ALPHA: 0.25
118
+ FOCAL_LOSS_GAMMA: 2.0
119
+ IN_FEATURES:
120
+ - p3
121
+ - p4
122
+ - p5
123
+ - p6
124
+ - p7
125
+ IOU_LABELS:
126
+ - 0
127
+ - -1
128
+ - 1
129
+ IOU_THRESHOLDS:
130
+ - 0.4
131
+ - 0.5
132
+ NMS_THRESH_TEST: 0.5
133
+ NUM_CLASSES: 80
134
+ NUM_CONVS: 4
135
+ PRIOR_PROB: 0.01
136
+ SCORE_THRESH_TEST: 0.05
137
+ SMOOTH_L1_LOSS_BETA: 0.1
138
+ TOPK_CANDIDATES_TEST: 1000
139
+ ROI_BOX_CASCADE_HEAD:
140
+ BBOX_REG_WEIGHTS:
141
+ - - 10.0
142
+ - 10.0
143
+ - 5.0
144
+ - 5.0
145
+ - - 20.0
146
+ - 20.0
147
+ - 10.0
148
+ - 10.0
149
+ - - 30.0
150
+ - 30.0
151
+ - 15.0
152
+ - 15.0
153
+ IOUS:
154
+ - 0.5
155
+ - 0.6
156
+ - 0.7
157
+ ROI_BOX_HEAD:
158
+ BBOX_REG_WEIGHTS:
159
+ - 10.0
160
+ - 10.0
161
+ - 5.0
162
+ - 5.0
163
+ CLS_AGNOSTIC_BBOX_REG: false
164
+ CONV_DIM: 256
165
+ FC_DIM: 1024
166
+ NAME: FastRCNNConvFCHead
167
+ NORM: ''
168
+ NUM_CONV: 0
169
+ NUM_FC: 2
170
+ POOLER_RESOLUTION: 7
171
+ POOLER_SAMPLING_RATIO: 0
172
+ POOLER_TYPE: ROIAlignV2
173
+ SMOOTH_L1_BETA: 0.0
174
+ TRAIN_ON_PRED_BOXES: false
175
+ ROI_HEADS:
176
+ BATCH_SIZE_PER_IMAGE: 512
177
+ IN_FEATURES:
178
+ - p2
179
+ - p3
180
+ - p4
181
+ - p5
182
+ IOU_LABELS:
183
+ - 0
184
+ - 1
185
+ IOU_THRESHOLDS:
186
+ - 0.5
187
+ NAME: StandardROIHeads
188
+ NMS_THRESH_TEST: 0.5
189
+ NUM_CLASSES: 80
190
+ POSITIVE_FRACTION: 0.25
191
+ PROPOSAL_APPEND_GT: true
192
+ SCORE_THRESH_TEST: 0.05
193
+ ROI_KEYPOINT_HEAD:
194
+ CONV_DIMS:
195
+ - 512
196
+ - 512
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ LOSS_WEIGHT: 1.0
204
+ MIN_KEYPOINTS_PER_IMAGE: 1
205
+ NAME: KRCNNConvDeconvUpsampleHead
206
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
207
+ NUM_KEYPOINTS: 17
208
+ POOLER_RESOLUTION: 14
209
+ POOLER_SAMPLING_RATIO: 0
210
+ POOLER_TYPE: ROIAlignV2
211
+ ROI_MASK_HEAD:
212
+ CLS_AGNOSTIC_MASK: false
213
+ CONV_DIM: 256
214
+ NAME: MaskRCNNConvUpsampleHead
215
+ NORM: ''
216
+ NUM_CONV: 4
217
+ POOLER_RESOLUTION: 14
218
+ POOLER_SAMPLING_RATIO: 0
219
+ POOLER_TYPE: ROIAlignV2
220
+ RPN:
221
+ BATCH_SIZE_PER_IMAGE: 256
222
+ BBOX_REG_WEIGHTS:
223
+ - 1.0
224
+ - 1.0
225
+ - 1.0
226
+ - 1.0
227
+ BOUNDARY_THRESH: -1
228
+ HEAD_NAME: StandardRPNHead
229
+ IN_FEATURES:
230
+ - p2
231
+ - p3
232
+ - p4
233
+ - p5
234
+ - p6
235
+ IOU_LABELS:
236
+ - 0
237
+ - -1
238
+ - 1
239
+ IOU_THRESHOLDS:
240
+ - 0.3
241
+ - 0.7
242
+ LOSS_WEIGHT: 1.0
243
+ NMS_THRESH: 0.7
244
+ POSITIVE_FRACTION: 0.5
245
+ POST_NMS_TOPK_TEST: 1000
246
+ POST_NMS_TOPK_TRAIN: 1000
247
+ PRE_NMS_TOPK_TEST: 1000
248
+ PRE_NMS_TOPK_TRAIN: 2000
249
+ SMOOTH_L1_BETA: 0.0
250
+ SEM_SEG_HEAD:
251
+ COMMON_STRIDE: 4
252
+ CONVS_DIM: 128
253
+ IGNORE_VALUE: 255
254
+ IN_FEATURES:
255
+ - p2
256
+ - p3
257
+ - p4
258
+ - p5
259
+ LOSS_WEIGHT: 1.0
260
+ NAME: SemSegFPNHead
261
+ NORM: GN
262
+ NUM_CLASSES: 54
263
+ WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
264
+ OUTPUT_DIR: ./output
265
+ SEED: -1
266
+ SOLVER:
267
+ BASE_LR: 0.02
268
+ BIAS_LR_FACTOR: 1.0
269
+ CHECKPOINT_PERIOD: 20000
270
+ GAMMA: 0.1
271
+ IMS_PER_BATCH: 16
272
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
273
+ MAX_ITER: 60000
274
+ MOMENTUM: 0.9
275
+ STEPS:
276
+ - 210000
277
+ - 250000
278
+ WARMUP_FACTOR: 0.001
279
+ WARMUP_ITERS: 1000
280
+ WARMUP_METHOD: linear
281
+ WEIGHT_DECAY: 0.0001
282
+ WEIGHT_DECAY_BIAS: 0.0001
283
+ WEIGHT_DECAY_NORM: 0.0
284
+ TEST:
285
+ AUG:
286
+ ENABLED: false
287
+ FLIP: true
288
+ MAX_SIZE: 4000
289
+ MIN_SIZES:
290
+ - 400
291
+ - 500
292
+ - 600
293
+ - 700
294
+ - 800
295
+ - 900
296
+ - 1000
297
+ - 1100
298
+ - 1200
299
+ DETECTIONS_PER_IMAGE: 100
300
+ EVAL_PERIOD: 0
301
+ EXPECTED_RESULTS: []
302
+ KEYPOINT_OKS_SIGMAS: []
303
+ PRECISE_BN:
304
+ ENABLED: false
305
+ NUM_ITER: 200
306
+ VERSION: 2
307
+ VIS_PERIOD: 0
model/layout-model-training/configs/prima/mask_rcnn_R_50_FPN_3x.yaml ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST: []
14
+ TRAIN: []
15
+ GLOBAL:
16
+ HACK: 1.0
17
+ INPUT:
18
+ CROP:
19
+ ENABLED: false
20
+ SIZE:
21
+ - 0.9
22
+ - 0.9
23
+ TYPE: relative_range
24
+ FORMAT: BGR
25
+ MASK_FORMAT: polygon
26
+ MAX_SIZE_TEST: 1333
27
+ MAX_SIZE_TRAIN: 1333
28
+ MIN_SIZE_TEST: 800
29
+ MIN_SIZE_TRAIN:
30
+ - 640
31
+ - 672
32
+ - 704
33
+ - 736
34
+ - 768
35
+ - 800
36
+ MIN_SIZE_TRAIN_SAMPLING: choice
37
+ MODEL:
38
+ ANCHOR_GENERATOR:
39
+ ANGLES:
40
+ - - -90
41
+ - 0
42
+ - 90
43
+ ASPECT_RATIOS:
44
+ - - 0.5
45
+ - 1.0
46
+ - 2.0
47
+ NAME: DefaultAnchorGenerator
48
+ OFFSET: 0.0
49
+ SIZES:
50
+ - - 32
51
+ - - 64
52
+ - - 128
53
+ - - 256
54
+ - - 512
55
+ BACKBONE:
56
+ FREEZE_AT: 2
57
+ NAME: build_resnet_fpn_backbone
58
+ DEVICE: cuda
59
+ FPN:
60
+ FUSE_TYPE: sum
61
+ IN_FEATURES:
62
+ - res2
63
+ - res3
64
+ - res4
65
+ - res5
66
+ NORM: ''
67
+ OUT_CHANNELS: 256
68
+ KEYPOINT_ON: false
69
+ LOAD_PROPOSALS: false
70
+ MASK_ON: true
71
+ META_ARCHITECTURE: GeneralizedRCNN
72
+ PANOPTIC_FPN:
73
+ COMBINE:
74
+ ENABLED: true
75
+ INSTANCES_CONFIDENCE_THRESH: 0.5
76
+ OVERLAP_THRESH: 0.5
77
+ STUFF_AREA_LIMIT: 4096
78
+ INSTANCE_LOSS_WEIGHT: 1.0
79
+ PIXEL_MEAN:
80
+ - 103.53
81
+ - 116.28
82
+ - 123.675
83
+ PIXEL_STD:
84
+ - 1.0
85
+ - 1.0
86
+ - 1.0
87
+ PROPOSAL_GENERATOR:
88
+ MIN_SIZE: 0
89
+ NAME: RPN
90
+ RESNETS:
91
+ DEFORM_MODULATED: false
92
+ DEFORM_NUM_GROUPS: 1
93
+ DEFORM_ON_PER_STAGE:
94
+ - false
95
+ - false
96
+ - false
97
+ - false
98
+ DEPTH: 50
99
+ NORM: FrozenBN
100
+ NUM_GROUPS: 1
101
+ OUT_FEATURES:
102
+ - res2
103
+ - res3
104
+ - res4
105
+ - res5
106
+ RES2_OUT_CHANNELS: 256
107
+ RES5_DILATION: 1
108
+ STEM_OUT_CHANNELS: 64
109
+ STRIDE_IN_1X1: true
110
+ WIDTH_PER_GROUP: 64
111
+ RETINANET:
112
+ BBOX_REG_WEIGHTS:
113
+ - 1.0
114
+ - 1.0
115
+ - 1.0
116
+ - 1.0
117
+ FOCAL_LOSS_ALPHA: 0.25
118
+ FOCAL_LOSS_GAMMA: 2.0
119
+ IN_FEATURES:
120
+ - p3
121
+ - p4
122
+ - p5
123
+ - p6
124
+ - p7
125
+ IOU_LABELS:
126
+ - 0
127
+ - -1
128
+ - 1
129
+ IOU_THRESHOLDS:
130
+ - 0.4
131
+ - 0.5
132
+ NMS_THRESH_TEST: 0.5
133
+ NUM_CLASSES: 80
134
+ NUM_CONVS: 4
135
+ PRIOR_PROB: 0.01
136
+ SCORE_THRESH_TEST: 0.05
137
+ SMOOTH_L1_LOSS_BETA: 0.1
138
+ TOPK_CANDIDATES_TEST: 1000
139
+ ROI_BOX_CASCADE_HEAD:
140
+ BBOX_REG_WEIGHTS:
141
+ - - 10.0
142
+ - 10.0
143
+ - 5.0
144
+ - 5.0
145
+ - - 20.0
146
+ - 20.0
147
+ - 10.0
148
+ - 10.0
149
+ - - 30.0
150
+ - 30.0
151
+ - 15.0
152
+ - 15.0
153
+ IOUS:
154
+ - 0.5
155
+ - 0.6
156
+ - 0.7
157
+ ROI_BOX_HEAD:
158
+ BBOX_REG_WEIGHTS:
159
+ - 10.0
160
+ - 10.0
161
+ - 5.0
162
+ - 5.0
163
+ CLS_AGNOSTIC_BBOX_REG: false
164
+ CONV_DIM: 256
165
+ FC_DIM: 1024
166
+ NAME: FastRCNNConvFCHead
167
+ NORM: ''
168
+ NUM_CONV: 0
169
+ NUM_FC: 2
170
+ POOLER_RESOLUTION: 7
171
+ POOLER_SAMPLING_RATIO: 0
172
+ POOLER_TYPE: ROIAlignV2
173
+ SMOOTH_L1_BETA: 0.0
174
+ TRAIN_ON_PRED_BOXES: false
175
+ ROI_HEADS:
176
+ BATCH_SIZE_PER_IMAGE: 512
177
+ IN_FEATURES:
178
+ - p2
179
+ - p3
180
+ - p4
181
+ - p5
182
+ IOU_LABELS:
183
+ - 0
184
+ - 1
185
+ IOU_THRESHOLDS:
186
+ - 0.5
187
+ NAME: StandardROIHeads
188
+ NMS_THRESH_TEST: 0.5
189
+ NUM_CLASSES: 80
190
+ POSITIVE_FRACTION: 0.25
191
+ PROPOSAL_APPEND_GT: true
192
+ SCORE_THRESH_TEST: 0.05
193
+ ROI_KEYPOINT_HEAD:
194
+ CONV_DIMS:
195
+ - 512
196
+ - 512
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ LOSS_WEIGHT: 1.0
204
+ MIN_KEYPOINTS_PER_IMAGE: 1
205
+ NAME: KRCNNConvDeconvUpsampleHead
206
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
207
+ NUM_KEYPOINTS: 17
208
+ POOLER_RESOLUTION: 14
209
+ POOLER_SAMPLING_RATIO: 0
210
+ POOLER_TYPE: ROIAlignV2
211
+ ROI_MASK_HEAD:
212
+ CLS_AGNOSTIC_MASK: false
213
+ CONV_DIM: 256
214
+ NAME: MaskRCNNConvUpsampleHead
215
+ NORM: ''
216
+ NUM_CONV: 4
217
+ POOLER_RESOLUTION: 14
218
+ POOLER_SAMPLING_RATIO: 0
219
+ POOLER_TYPE: ROIAlignV2
220
+ RPN:
221
+ BATCH_SIZE_PER_IMAGE: 256
222
+ BBOX_REG_WEIGHTS:
223
+ - 1.0
224
+ - 1.0
225
+ - 1.0
226
+ - 1.0
227
+ BOUNDARY_THRESH: -1
228
+ HEAD_NAME: StandardRPNHead
229
+ IN_FEATURES:
230
+ - p2
231
+ - p3
232
+ - p4
233
+ - p5
234
+ - p6
235
+ IOU_LABELS:
236
+ - 0
237
+ - -1
238
+ - 1
239
+ IOU_THRESHOLDS:
240
+ - 0.3
241
+ - 0.7
242
+ LOSS_WEIGHT: 1.0
243
+ NMS_THRESH: 0.7
244
+ POSITIVE_FRACTION: 0.5
245
+ POST_NMS_TOPK_TEST: 1000
246
+ POST_NMS_TOPK_TRAIN: 1000
247
+ PRE_NMS_TOPK_TEST: 1000
248
+ PRE_NMS_TOPK_TRAIN: 2000
249
+ SMOOTH_L1_BETA: 0.0
250
+ SEM_SEG_HEAD:
251
+ COMMON_STRIDE: 4
252
+ CONVS_DIM: 128
253
+ IGNORE_VALUE: 255
254
+ IN_FEATURES:
255
+ - p2
256
+ - p3
257
+ - p4
258
+ - p5
259
+ LOSS_WEIGHT: 1.0
260
+ NAME: SemSegFPNHead
261
+ NORM: GN
262
+ NUM_CLASSES: 54
263
+ WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
264
+ OUTPUT_DIR: ./output
265
+ SEED: -1
266
+ SOLVER:
267
+ BASE_LR: 0.02
268
+ BIAS_LR_FACTOR: 1.0
269
+ CHECKPOINT_PERIOD: 20000
270
+ GAMMA: 0.1
271
+ IMS_PER_BATCH: 16
272
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
273
+ MAX_ITER: 60000
274
+ MOMENTUM: 0.9
275
+ STEPS:
276
+ - 210000
277
+ - 250000
278
+ WARMUP_FACTOR: 0.001
279
+ WARMUP_ITERS: 1000
280
+ WARMUP_METHOD: linear
281
+ WEIGHT_DECAY: 0.0001
282
+ WEIGHT_DECAY_BIAS: 0.0001
283
+ WEIGHT_DECAY_NORM: 0.0
284
+ TEST:
285
+ AUG:
286
+ ENABLED: false
287
+ FLIP: true
288
+ MAX_SIZE: 4000
289
+ MIN_SIZES:
290
+ - 400
291
+ - 500
292
+ - 600
293
+ - 700
294
+ - 800
295
+ - 900
296
+ - 1000
297
+ - 1100
298
+ - 1200
299
+ DETECTIONS_PER_IMAGE: 100
300
+ EVAL_PERIOD: 0
301
+ EXPECTED_RESULTS: []
302
+ KEYPOINT_OKS_SIGMAS: []
303
+ PRECISE_BN:
304
+ ENABLED: false
305
+ NUM_ITER: 200
306
+ VERSION: 2
307
+ VIS_PERIOD: 0
model/layout-model-training/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ layoutparser
2
+ funcy
3
+ bs4
4
+ scikit-learn
5
+ imagesize
6
+ tqdm
model/layout-model-training/scripts/train_prima.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ cd ../tools
4
+
5
+ python convert_prima_to_coco.py \
6
+ --prima_datapath ../data/prima \
7
+ --anno_savepath ../data/prima/annotations.json
8
+
9
+ python train_net.py \
10
+ --dataset_name prima-layout \
11
+ --json_annotation_train ../data/prima/annotations-train.json \
12
+ --image_path_train ../data/prima/Images \
13
+ --json_annotation_val ../data/prima/annotations-val.json \
14
+ --image_path_val ../data/prima/Images \
15
+ --config-file ../configs/prima/mask_rcnn_R_50_FPN_3x.yaml \
16
+ OUTPUT_DIR ../outputs/prima/mask_rcnn_R_50_FPN_3x/ \
17
+ SOLVER.IMS_PER_BATCH 2
model/layout-model-training/tools/convert_prima_to_coco.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, json
2
+ import imagesize
3
+ from glob import glob
4
+ from bs4 import BeautifulSoup
5
+ import numpy as np
6
+ from PIL import Image
7
+ import argparse
8
+ from tqdm import tqdm
9
+ import sys
10
+ sys.path.append('..')
11
+ from utils import cocosplit
12
+
13
+ class NpEncoder(json.JSONEncoder):
14
+ def default(self, obj):
15
+ if isinstance(obj, np.integer):
16
+ return int(obj)
17
+ elif isinstance(obj, np.floating):
18
+ return float(obj)
19
+ elif isinstance(obj, np.ndarray):
20
+ return obj.tolist()
21
+ else:
22
+ return super(NpEncoder, self).default(obj)
23
+
24
+ def cvt_coords_to_array(obj):
25
+
26
+ return np.array(
27
+ [(float(pt['x']), float(pt['y']))
28
+ for pt in obj.find_all("Point")]
29
+ )
30
+
31
+ def cal_ployarea(points):
32
+ x = points[:,0]
33
+ y = points[:,1]
34
+ return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
35
+
36
+ def _create_category(schema=0):
37
+
38
+ if schema==0:
39
+
40
+ categories = \
41
+ [{"supercategory": "layout", "id": 0, "name": "Background"},
42
+ {"supercategory": "layout", "id": 1, "name": "TextRegion"},
43
+ {"supercategory": "layout", "id": 2, "name": "ImageRegion"},
44
+ {"supercategory": "layout", "id": 3, "name": "TableRegion"},
45
+ {"supercategory": "layout", "id": 4, "name": "MathsRegion"},
46
+ {"supercategory": "layout", "id": 5, "name": "SeparatorRegion"},
47
+ {"supercategory": "layout", "id": 6, "name": "OtherRegion"}]
48
+
49
+ find_categories = lambda name: \
50
+ [val["id"] for val in categories if val['name'] == name][0]
51
+
52
+ conversion = \
53
+ {
54
+ 'TextRegion': find_categories("TextRegion"),
55
+ 'TableRegion': find_categories("TableRegion"),
56
+ 'MathsRegion': find_categories("MathsRegion"),
57
+ 'ChartRegion': find_categories("ImageRegion"),
58
+ 'GraphicRegion': find_categories("ImageRegion"),
59
+ 'ImageRegion': find_categories("ImageRegion"),
60
+ 'LineDrawingRegion':find_categories("OtherRegion"),
61
+ 'SeparatorRegion': find_categories("SeparatorRegion"),
62
+ 'NoiseRegion': find_categories("OtherRegion"),
63
+ 'FrameRegion': find_categories("OtherRegion"),
64
+ }
65
+
66
+ return categories, conversion
67
+
68
+ _categories, _categories_conversion = _create_category(schema=0)
69
+
70
+ _info = {
71
+ "description": "PRIMA Layout Analysis Dataset",
72
+ "url": "https://www.primaresearch.org/datasets/Layout_Analysis",
73
+ "version": "1.0",
74
+ "year": 2010,
75
+ "contributor": "PRIMA Research",
76
+ "date_created": "2020/09/01",
77
+ }
78
+
79
+ def _load_soup(filename):
80
+ with open(filename, "r") as fp:
81
+ soup = BeautifulSoup(fp.read(),'xml')
82
+
83
+ return soup
84
+
85
+ def _image_template(image_id, image_path):
86
+
87
+ width, height = imagesize.get(image_path)
88
+
89
+ return {
90
+ "file_name": os.path.basename(image_path),
91
+ "height": height,
92
+ "width": width,
93
+ "id": int(image_id)
94
+ }
95
+
96
+ def _anno_template(anno_id, image_id, pts, obj_tag):
97
+
98
+ x_1, x_2 = pts[:,0].min(), pts[:,0].max()
99
+ y_1, y_2 = pts[:,1].min(), pts[:,1].max()
100
+ height = y_2 - y_1
101
+ width = x_2 - x_1
102
+
103
+ return {
104
+ "segmentation": [pts.flatten().tolist()],
105
+ "area": cal_ployarea(pts),
106
+ "iscrowd": 0,
107
+ "image_id": image_id,
108
+ "bbox": [x_1, y_1, width, height],
109
+ "category_id": _categories_conversion[obj_tag],
110
+ "id": anno_id
111
+ }
112
+
113
+ class PRIMADataset():
114
+
115
+ def __init__(self, base_path, anno_path='XML',
116
+ image_path='Images'):
117
+
118
+ self.base_path = base_path
119
+ self.anno_path = os.path.join(base_path, anno_path)
120
+ self.image_path = os.path.join(base_path, image_path)
121
+
122
+ self._ids = self.find_all_image_ids()
123
+
124
+ def __len__(self):
125
+ return len(self.ids)
126
+
127
+ def __getitem__(self, idx):
128
+ return self.load_image_and_annotaiton(idx)
129
+
130
+ def find_all_annotation_files(self):
131
+ return glob(os.path.join(self.anno_path, '*.xml'))
132
+
133
+ def find_all_image_ids(self):
134
+ replacer = lambda s: os.path.basename(s).replace('pc-', '').replace('.xml', '')
135
+ return [replacer(s) for s in self.find_all_annotation_files()]
136
+
137
+ def load_image_and_annotaiton(self, idx):
138
+
139
+ image_id = self._ids[idx]
140
+
141
+ image_path = os.path.join(self.image_path, f'{image_id}.tif')
142
+ image = Image.open(image_path)
143
+
144
+ anno = self.load_annotation(idx)
145
+
146
+ return image, anno
147
+
148
+ def load_annotation(self, idx):
149
+ image_id = self._ids[idx]
150
+
151
+ anno_path = os.path.join(self.anno_path, f'pc-{image_id}.xml')
152
+ # A dirtly hack to load the files w/wo pc- simualtaneously
153
+ if not os.path.exists(anno_path):
154
+ anno_path = os.path.join(self.anno_path, f'{image_id}.xml')
155
+ assert os.path.exists(anno_path), "Invalid path"
156
+ anno = _load_soup(anno_path)
157
+
158
+ return anno
159
+
160
+ def convert_to_COCO(self, save_path):
161
+
162
+ all_image_infos = []
163
+ all_anno_infos = []
164
+ anno_id = 0
165
+
166
+ for idx, image_id in enumerate(tqdm(self._ids)):
167
+
168
+ # We use the idx as the image id
169
+
170
+ image_path = os.path.join(self.image_path, f'{image_id}.tif')
171
+ image_info = _image_template(idx, image_path)
172
+ all_image_infos.append(image_info)
173
+
174
+ anno = self.load_annotation(idx)
175
+
176
+ for item in anno.find_all(re.compile(".*Region")):
177
+
178
+ pts = cvt_coords_to_array(item.Coords)
179
+ if 0 not in pts.shape:
180
+ # Sometimes there will be polygons with less
181
+ # than 4 edges, and they could not be appropriately
182
+ # handled by the COCO format. So we just drop them.
183
+ if pts.shape[0] >= 4:
184
+ anno_info = _anno_template(anno_id, idx, pts, item.name)
185
+ all_anno_infos.append(anno_info)
186
+ anno_id += 1
187
+
188
+
189
+ final_annotation = {
190
+ "info": _info,
191
+ "licenses": [],
192
+ "images": all_image_infos,
193
+ "annotations": all_anno_infos,
194
+ "categories": _categories}
195
+
196
+ with open(save_path, 'w') as fp:
197
+ json.dump(final_annotation, fp, cls=NpEncoder)
198
+
199
+ return final_annotation
200
+
201
+
202
+ parser = argparse.ArgumentParser()
203
+
204
+ parser.add_argument('--prima_datapath', type=str, default='./data/prima', help='the path to the prima data folders')
205
+ parser.add_argument('--anno_savepath', type=str, default='./annotations.json', help='the path to save the new annotations')
206
+
207
+
208
+ if __name__ == "__main__":
209
+ args = parser.parse_args()
210
+
211
+ print("Start running the conversion script")
212
+
213
+ print(f"Loading the information from the path {args.prima_datapath}")
214
+ dataset = PRIMADataset(args.prima_datapath)
215
+
216
+ print(f"Saving the annotation to {args.anno_savepath}")
217
+ res = dataset.convert_to_COCO(args.anno_savepath)
218
+
219
+ cocosplit.main(
220
+ args.anno_savepath,
221
+ split_ratio=0.8,
222
+ having_annotations=True,
223
+ train_save_path=args.anno_savepath.replace('.json', '-train.json'),
224
+ test_save_path=args.anno_savepath.replace('.json', '-val.json'),
225
+ random_state=24)
model/layout-model-training/tools/train_net.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ The script is based on https://github.com/facebookresearch/detectron2/blob/master/tools/train_net.py.
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import json
8
+ from collections import OrderedDict
9
+ import detectron2.utils.comm as comm
10
+ import detectron2.data.transforms as T
11
+ from detectron2.checkpoint import DetectionCheckpointer
12
+ from detectron2.config import get_cfg
13
+ from detectron2.data import DatasetMapper, build_detection_train_loader
14
+
15
+ from detectron2.data.datasets import register_coco_instances
16
+
17
+ from detectron2.engine import (
18
+ DefaultTrainer,
19
+ default_argument_parser,
20
+ default_setup,
21
+ hooks,
22
+ launch,
23
+ )
24
+ from detectron2.evaluation import (
25
+ COCOEvaluator,
26
+ verify_results,
27
+ )
28
+ from detectron2.modeling import GeneralizedRCNNWithTTA
29
+ import pandas as pd
30
+
31
+
32
+ def get_augs(cfg):
33
+ """Add all the desired augmentations here. A list of availble augmentations
34
+ can be found here:
35
+ https://detectron2.readthedocs.io/en/latest/modules/data_transforms.html
36
+ """
37
+ augs = [
38
+ T.ResizeShortestEdge(
39
+ cfg.INPUT.MIN_SIZE_TRAIN,
40
+ cfg.INPUT.MAX_SIZE_TRAIN,
41
+ cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING,
42
+ )
43
+ ]
44
+ if cfg.INPUT.CROP.ENABLED:
45
+ augs.append(
46
+ T.RandomCrop_CategoryAreaConstraint(
47
+ cfg.INPUT.CROP.TYPE,
48
+ cfg.INPUT.CROP.SIZE,
49
+ cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA,
50
+ cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
51
+ )
52
+ )
53
+ horizontal_flip: bool = cfg.INPUT.RANDOM_FLIP == "horizontal"
54
+ augs.append(T.RandomFlip(horizontal=horizontal_flip, vertical=not horizontal_flip))
55
+ # Rotate the image between -90 to 0 degrees clockwise around the centre
56
+ augs.append(T.RandomRotation(angle=[-90.0, 0.0]))
57
+ return augs
58
+
59
+
60
+ class Trainer(DefaultTrainer):
61
+ """
62
+ We use the "DefaultTrainer" which contains pre-defined default logic for
63
+ standard training workflow. They may not work for you, especially if you
64
+ are working on a new research project. In that case you can use the cleaner
65
+ "SimpleTrainer", or write your own training loop. You can use
66
+ "tools/plain_train_net.py" as an example.
67
+
68
+ Adapted from:
69
+ https://github.com/facebookresearch/detectron2/blob/master/projects/DeepLab/train_net.py
70
+ """
71
+
72
+ @classmethod
73
+ def build_train_loader(cls, cfg):
74
+ mapper = DatasetMapper(cfg, is_train=True, augmentations=get_augs(cfg))
75
+ return build_detection_train_loader(cfg, mapper=mapper)
76
+
77
+ @classmethod
78
+ def build_evaluator(cls, cfg, dataset_name, output_folder=None):
79
+ """
80
+ Returns:
81
+ DatasetEvaluator or None
82
+
83
+ It is not implemented by default.
84
+ """
85
+ return COCOEvaluator(dataset_name, cfg, True, output_folder)
86
+
87
+ @classmethod
88
+ def test_with_TTA(cls, cfg, model):
89
+ logger = logging.getLogger("detectron2.trainer")
90
+ # In the end of training, run an evaluation with TTA
91
+ # Only support some R-CNN models.
92
+ logger.info("Running inference with test-time augmentation ...")
93
+ model = GeneralizedRCNNWithTTA(cfg, model)
94
+ evaluators = [
95
+ cls.build_evaluator(
96
+ cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
97
+ )
98
+ for name in cfg.DATASETS.TEST
99
+ ]
100
+ res = cls.test(cfg, model, evaluators)
101
+ res = OrderedDict({k + "_TTA": v for k, v in res.items()})
102
+ return res
103
+
104
+ @classmethod
105
+ def eval_and_save(cls, cfg, model):
106
+ evaluators = [
107
+ cls.build_evaluator(
108
+ cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference")
109
+ )
110
+ for name in cfg.DATASETS.TEST
111
+ ]
112
+ res = cls.test(cfg, model, evaluators)
113
+ pd.DataFrame(res).to_csv(os.path.join(cfg.OUTPUT_DIR, "eval.csv"))
114
+ return res
115
+
116
+
117
+ def setup(args):
118
+ """
119
+ Create configs and perform basic setups.
120
+ """
121
+ cfg = get_cfg()
122
+
123
+ if args.config_file != "":
124
+ cfg.merge_from_file(args.config_file)
125
+ cfg.merge_from_list(args.opts)
126
+
127
+ with open(args.json_annotation_train, "r") as fp:
128
+ anno_file = json.load(fp)
129
+
130
+ cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(anno_file["categories"])
131
+ del anno_file
132
+
133
+ cfg.DATASETS.TRAIN = (f"{args.dataset_name}-train",)
134
+ cfg.DATASETS.TEST = (f"{args.dataset_name}-val",)
135
+ cfg.freeze()
136
+ default_setup(cfg, args)
137
+ return cfg
138
+
139
+
140
+ def main(args):
141
+ # Register Datasets
142
+ register_coco_instances(
143
+ f"{args.dataset_name}-train",
144
+ {},
145
+ args.json_annotation_train,
146
+ args.image_path_train,
147
+ )
148
+
149
+ register_coco_instances(
150
+ f"{args.dataset_name}-val",
151
+ {},
152
+ args.json_annotation_val,
153
+ args.image_path_val
154
+ )
155
+ cfg = setup(args)
156
+
157
+ if args.eval_only:
158
+ model = Trainer.build_model(cfg)
159
+ DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
160
+ cfg.MODEL.WEIGHTS, resume=args.resume
161
+ )
162
+ res = Trainer.test(cfg, model)
163
+
164
+ if cfg.TEST.AUG.ENABLED:
165
+ res.update(Trainer.test_with_TTA(cfg, model))
166
+ if comm.is_main_process():
167
+ verify_results(cfg, res)
168
+
169
+ # Save the evaluation results
170
+ pd.DataFrame(res).to_csv(f"{cfg.OUTPUT_DIR}/eval.csv")
171
+ return res
172
+
173
+ # Ensure that the Output directory exists
174
+ os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
175
+
176
+ """
177
+ If you'd like to do anything fancier than the standard training logic,
178
+ consider writing your own training loop (see plain_train_net.py) or
179
+ subclassing the trainer.
180
+ """
181
+ trainer = Trainer(cfg)
182
+ trainer.resume_or_load(resume=args.resume)
183
+ trainer.register_hooks(
184
+ [hooks.EvalHook(0, lambda: trainer.eval_and_save(cfg, trainer.model))]
185
+ )
186
+ if cfg.TEST.AUG.ENABLED:
187
+ trainer.register_hooks(
188
+ [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
189
+ )
190
+ return trainer.train()
191
+
192
+
193
+ if __name__ == "__main__":
194
+ parser = default_argument_parser()
195
+
196
+ # Extra Configurations for dataset names and paths
197
+ parser.add_argument(
198
+ "--dataset_name",
199
+ help="The Dataset Name")
200
+ parser.add_argument(
201
+ "--json_annotation_train",
202
+ help="The path to the training set JSON annotation",
203
+ )
204
+ parser.add_argument(
205
+ "--image_path_train",
206
+ help="The path to the training set image folder",
207
+ )
208
+ parser.add_argument(
209
+ "--json_annotation_val",
210
+ help="The path to the validation set JSON annotation",
211
+ )
212
+ parser.add_argument(
213
+ "--image_path_val",
214
+ help="The path to the validation set image folder",
215
+ )
216
+ args = parser.parse_args()
217
+ print("Command Line Args:", args)
218
+
219
+ # Dataset Registration is moved to the main function to support multi-gpu training
220
+ # See ref https://github.com/facebookresearch/detectron2/issues/253#issuecomment-554216517
221
+
222
+ launch(
223
+ main,
224
+ args.num_gpus,
225
+ num_machines=args.num_machines,
226
+ machine_rank=args.machine_rank,
227
+ dist_url=args.dist_url,
228
+ args=(args,),
229
+ )
model/layout-model-training/utils/__init__.py ADDED
File without changes
model/layout-model-training/utils/cocosplit.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modified based on https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py
2
+
3
+ import json
4
+ import argparse
5
+ import funcy
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ parser = argparse.ArgumentParser(
9
+ description="Splits COCO annotations file into training and test sets."
10
+ )
11
+ parser.add_argument(
12
+ "--annotation-path",
13
+ metavar="coco_annotations",
14
+ type=str,
15
+ help="Path to COCO annotations file.",
16
+ )
17
+ parser.add_argument(
18
+ "--train", type=str, help="Where to store COCO training annotations"
19
+ )
20
+ parser.add_argument("--test", type=str, help="Where to store COCO test annotations")
21
+ parser.add_argument(
22
+ "--split-ratio",
23
+ dest="split_ratio",
24
+ type=float,
25
+ required=True,
26
+ help="A percentage of a split; a number in (0, 1)",
27
+ )
28
+ parser.add_argument(
29
+ "--having-annotations",
30
+ dest="having_annotations",
31
+ action="store_true",
32
+ help="Ignore all images without annotations. Keep only these with at least one annotation",
33
+ )
34
+
35
+
36
+ def save_coco(file, tagged_data):
37
+ with open(file, "wt", encoding="UTF-8") as coco:
38
+ json.dump(tagged_data, coco, indent=2, sort_keys=True)
39
+
40
+
41
+ def filter_annotations(annotations, images):
42
+ image_ids = funcy.lmap(lambda i: int(i["id"]), images)
43
+ return funcy.lfilter(lambda a: int(a["image_id"]) in image_ids, annotations)
44
+
45
+
46
+ def main(
47
+ annotation_path,
48
+ split_ratio,
49
+ having_annotations,
50
+ train_save_path,
51
+ test_save_path,
52
+ random_state=None,
53
+ ):
54
+
55
+ with open(annotation_path, "rt", encoding="UTF-8") as annotations:
56
+ coco = json.load(annotations)
57
+
58
+ images = coco["images"]
59
+ annotations = coco["annotations"]
60
+
61
+ ids_with_annotations = funcy.lmap(lambda a: int(a["image_id"]), annotations)
62
+
63
+ # Images with annotations
64
+ img_ann = funcy.lremove(lambda i: i["id"] not in ids_with_annotations, images)
65
+ tr_ann, ts_ann = train_test_split(
66
+ img_ann, train_size=split_ratio, random_state=random_state
67
+ )
68
+
69
+ img_wo_ann = funcy.lremove(lambda i: i["id"] in ids_with_annotations, images)
70
+ if len(img_wo_ann) > 0:
71
+ tr_wo_ann, ts_wo_ann = train_test_split(
72
+ img_wo_ann, train_size=split_ratio, random_state=random_state
73
+ )
74
+ else:
75
+ tr_wo_ann, ts_wo_ann = [], [] # Images without annotations
76
+
77
+ if having_annotations:
78
+ tr, ts = tr_ann, ts_ann
79
+
80
+ else:
81
+ # Merging the 2 image lists (i.e. with and without annotation)
82
+ tr_ann.extend(tr_wo_ann)
83
+ ts_ann.extend(ts_wo_ann)
84
+
85
+ tr, ts = tr_ann, ts_ann
86
+
87
+ # Train Data
88
+ coco.update({"images": tr, "annotations": filter_annotations(annotations, tr)})
89
+ save_coco(train_save_path, coco)
90
+
91
+ # Test Data
92
+ coco.update({"images": ts, "annotations": filter_annotations(annotations, ts)})
93
+ save_coco(test_save_path, coco)
94
+
95
+ print(
96
+ "Saved {} entries in {} and {} in {}".format(
97
+ len(tr), train_save_path, len(ts), test_save_path
98
+ )
99
+ )
100
+
101
+
102
+ if __name__ == "__main__":
103
+ args = parser.parse_args()
104
+
105
+ main(
106
+ args.annotation_path,
107
+ args.split_ratio,
108
+ args.having_annotations,
109
+ args.train,
110
+ args.test,
111
+ random_state=24,
112
+ )