Eterna2 commited on
Commit
f46e640
1 Parent(s): 243db35

Upload model weights

Browse files
Files changed (24) hide show
  1. README.md +2 -0
  2. models/HJDataset/HJDataset-faster_rcnn_R_50_FPN_3x-config.yml +309 -0
  3. models/HJDataset/HJDataset-faster_rcnn_R_50_FPN_3x-model.pth +3 -0
  4. models/HJDataset/HJDataset-mask_rcnn_R_50_FPN_3x-config.yml +309 -0
  5. models/HJDataset/HJDataset-mask_rcnn_R_50_FPN_3x-model.pth +3 -0
  6. models/HJDataset/HJDataset-retinanet_R_50_FPN_3x-config.yml +310 -0
  7. models/HJDataset/HJDataset-retinanet_R_50_FPN_3x-model.pth +3 -0
  8. MFD-faster_rcnn_R_50_FPN_3x-config.yml → models/MFD/MFD-faster_rcnn_R_50_FPN_3x-config.yml +0 -0
  9. MFD-faster_rcnn_R_50_FPN_3x-model.pth → models/MFD/MFD-faster_rcnn_R_50_FPN_3x-model.pth +0 -0
  10. models/NewspaperNavigator/NewspaperNavigator-faster_rcnn_R_50_FPN_3x-config.yml +311 -0
  11. models/NewspaperNavigator/NewspaperNavigator-faster_rcnn_R_50_FPN_3x-model.pth +3 -0
  12. models/PrimaLayout/PrimaLayout-mask_rcnn_R_50_FPN_3x-config.yml +315 -0
  13. models/PrimaLayout/PrimaLayout-mask_rcnn_R_50_FPN_3x-model.pth +3 -0
  14. models/PubLayNet/PubLayNet-faster_rcnn_R_50_FPN_3x-config.yml +309 -0
  15. models/PubLayNet/PubLayNet-faster_rcnn_R_50_FPN_3x-model.pth +3 -0
  16. models/PubLayNet/PubLayNet-mask_rcnn_R_50_FPN_3x-config.yml +309 -0
  17. models/PubLayNet/PubLayNet-mask_rcnn_R_50_FPN_3x-model.pth +3 -0
  18. models/PubLayNet/PubLayNet-mask_rcnn_X_101_32x8d_FPN_3x-config.yml +309 -0
  19. models/PubLayNet/PubLayNet-mask_rcnn_X_101_32x8d_FPN_3x-model.pth +3 -0
  20. models/TableBank/TableBank-faster_rcnn_R_101_FPN_3x-config.yml +317 -0
  21. models/TableBank/TableBank-faster_rcnn_R_101_FPN_3x-model.pth +3 -0
  22. models/TableBank/TableBank-faster_rcnn_R_50_FPN_3x-config.yml +317 -0
  23. models/TableBank/TableBank-faster_rcnn_R_50_FPN_3x-model.pth +3 -0
  24. requirements.txt +0 -10
README.md CHANGED
@@ -1,3 +1,5 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ Model binaries downloaded from https://github.com/Layout-Parser/layout-parser/blob/c0044a08da7a630e2241348e597a08ba6aa87ba1/src/layoutparser/models/detectron2/catalog.py
models/HJDataset/HJDataset-faster_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 2
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - HJDataset_test
15
+ TRAIN:
16
+ - HJDataset_train
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: false
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 1.0
87
+ - 1.0
88
+ - 1.0
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 50
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 1
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: true
112
+ WIDTH_PER_GROUP: 64
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS:
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 256
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.5
191
+ NUM_CLASSES: 8
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.05
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS:
225
+ - 1.0
226
+ - 1.0
227
+ - 1.0
228
+ - 1.0
229
+ BOUNDARY_THRESH: -1
230
+ HEAD_NAME: StandardRPNHead
231
+ IN_FEATURES:
232
+ - p2
233
+ - p3
234
+ - p4
235
+ - p5
236
+ - p6
237
+ IOU_LABELS:
238
+ - 0
239
+ - -1
240
+ - 1
241
+ IOU_THRESHOLDS:
242
+ - 0.3
243
+ - 0.7
244
+ LOSS_WEIGHT: 1.0
245
+ NMS_THRESH: 0.7
246
+ POSITIVE_FRACTION: 0.5
247
+ POST_NMS_TOPK_TEST: 1000
248
+ POST_NMS_TOPK_TRAIN: 1000
249
+ PRE_NMS_TOPK_TEST: 1000
250
+ PRE_NMS_TOPK_TRAIN: 2000
251
+ SMOOTH_L1_BETA: 0.0
252
+ SEM_SEG_HEAD:
253
+ COMMON_STRIDE: 4
254
+ CONVS_DIM: 128
255
+ IGNORE_VALUE: 255
256
+ IN_FEATURES:
257
+ - p2
258
+ - p3
259
+ - p4
260
+ - p5
261
+ LOSS_WEIGHT: 1.0
262
+ NAME: SemSegFPNHead
263
+ NORM: GN
264
+ NUM_CLASSES: 54
265
+ WEIGHTS: https://www.dropbox.com/s/6icw6at8m28a2ho/model_final.pth?dl=1
266
+ OUTPUT_DIR: ./train_log/faster_rcnn_R_50_FPN_3x
267
+ SEED: -1
268
+ SOLVER:
269
+ BASE_LR: 0.00025
270
+ BIAS_LR_FACTOR: 1.0
271
+ CHECKPOINT_PERIOD: 30000
272
+ GAMMA: 0.1
273
+ IMS_PER_BATCH: 2
274
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
275
+ MAX_ITER: 60000
276
+ MOMENTUM: 0.9
277
+ STEPS:
278
+ - 210000
279
+ - 250000
280
+ WARMUP_FACTOR: 0.001
281
+ WARMUP_ITERS: 1000
282
+ WARMUP_METHOD: linear
283
+ WEIGHT_DECAY: 0.0001
284
+ WEIGHT_DECAY_BIAS: 0.0001
285
+ WEIGHT_DECAY_NORM: 0.0
286
+ TEST:
287
+ AUG:
288
+ ENABLED: false
289
+ FLIP: true
290
+ MAX_SIZE: 4000
291
+ MIN_SIZES:
292
+ - 400
293
+ - 500
294
+ - 600
295
+ - 700
296
+ - 800
297
+ - 900
298
+ - 1000
299
+ - 1100
300
+ - 1200
301
+ DETECTIONS_PER_IMAGE: 100
302
+ EVAL_PERIOD: 0
303
+ EXPECTED_RESULTS: []
304
+ KEYPOINT_OKS_SIGMAS: []
305
+ PRECISE_BN:
306
+ ENABLED: false
307
+ NUM_ITER: 200
308
+ VERSION: 2
309
+ VIS_PERIOD: 0
models/HJDataset/HJDataset-faster_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d00c5fe253e0ad1b22dc6cfe495193eb9c1ddf57f09df20c6f2badb5e48ea8af
3
+ size 330267761
models/HJDataset/HJDataset-mask_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 2
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - HJDataset_test
15
+ TRAIN:
16
+ - HJDataset_train
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: true
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 1.0
87
+ - 1.0
88
+ - 1.0
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 50
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 1
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: true
112
+ WIDTH_PER_GROUP: 64
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS:
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 256
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.5
191
+ NUM_CLASSES: 8
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.05
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS:
225
+ - 1.0
226
+ - 1.0
227
+ - 1.0
228
+ - 1.0
229
+ BOUNDARY_THRESH: -1
230
+ HEAD_NAME: StandardRPNHead
231
+ IN_FEATURES:
232
+ - p2
233
+ - p3
234
+ - p4
235
+ - p5
236
+ - p6
237
+ IOU_LABELS:
238
+ - 0
239
+ - -1
240
+ - 1
241
+ IOU_THRESHOLDS:
242
+ - 0.3
243
+ - 0.7
244
+ LOSS_WEIGHT: 1.0
245
+ NMS_THRESH: 0.7
246
+ POSITIVE_FRACTION: 0.5
247
+ POST_NMS_TOPK_TEST: 1000
248
+ POST_NMS_TOPK_TRAIN: 1000
249
+ PRE_NMS_TOPK_TEST: 1000
250
+ PRE_NMS_TOPK_TRAIN: 2000
251
+ SMOOTH_L1_BETA: 0.0
252
+ SEM_SEG_HEAD:
253
+ COMMON_STRIDE: 4
254
+ CONVS_DIM: 128
255
+ IGNORE_VALUE: 255
256
+ IN_FEATURES:
257
+ - p2
258
+ - p3
259
+ - p4
260
+ - p5
261
+ LOSS_WEIGHT: 1.0
262
+ NAME: SemSegFPNHead
263
+ NORM: GN
264
+ NUM_CLASSES: 54
265
+ WEIGHTS: https://www.dropbox.com/s/893paxpy5suvlx9/model_final.pth?dl=1
266
+ OUTPUT_DIR: ./train_log/mask_rcnn_R_50_FPN_3x
267
+ SEED: -1
268
+ SOLVER:
269
+ BASE_LR: 0.00025
270
+ BIAS_LR_FACTOR: 1.0
271
+ CHECKPOINT_PERIOD: 30000
272
+ GAMMA: 0.1
273
+ IMS_PER_BATCH: 2
274
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
275
+ MAX_ITER: 60000
276
+ MOMENTUM: 0.9
277
+ STEPS:
278
+ - 210000
279
+ - 250000
280
+ WARMUP_FACTOR: 0.001
281
+ WARMUP_ITERS: 1000
282
+ WARMUP_METHOD: linear
283
+ WEIGHT_DECAY: 0.0001
284
+ WEIGHT_DECAY_BIAS: 0.0001
285
+ WEIGHT_DECAY_NORM: 0.0
286
+ TEST:
287
+ AUG:
288
+ ENABLED: false
289
+ FLIP: true
290
+ MAX_SIZE: 4000
291
+ MIN_SIZES:
292
+ - 400
293
+ - 500
294
+ - 600
295
+ - 700
296
+ - 800
297
+ - 900
298
+ - 1000
299
+ - 1100
300
+ - 1200
301
+ DETECTIONS_PER_IMAGE: 100
302
+ EVAL_PERIOD: 0
303
+ EXPECTED_RESULTS: []
304
+ KEYPOINT_OKS_SIGMAS: []
305
+ PRECISE_BN:
306
+ ENABLED: false
307
+ NUM_ITER: 200
308
+ VERSION: 2
309
+ VIS_PERIOD: 0
models/HJDataset/HJDataset-mask_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f608a5d9b0c86e020d7c4b39fa469de64210a75c1257f15c9c2d3a6ac740fc
3
+ size 351272203
models/HJDataset/HJDataset-retinanet_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 2
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - HJDataset_test
15
+ TRAIN:
16
+ - HJDataset_train
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - 40.31747359663594
54
+ - 50.79683366298238
55
+ - - 64
56
+ - 80.63494719327188
57
+ - 101.59366732596476
58
+ - - 128
59
+ - 161.26989438654377
60
+ - 203.18733465192952
61
+ - - 256
62
+ - 322.53978877308754
63
+ - 406.37466930385904
64
+ - - 512
65
+ - 645.0795775461751
66
+ - 812.7493386077181
67
+ BACKBONE:
68
+ FREEZE_AT: 2
69
+ NAME: build_retinanet_resnet_fpn_backbone
70
+ DEVICE: cuda
71
+ FPN:
72
+ FUSE_TYPE: sum
73
+ IN_FEATURES:
74
+ - res3
75
+ - res4
76
+ - res5
77
+ NORM: ''
78
+ OUT_CHANNELS: 256
79
+ KEYPOINT_ON: false
80
+ LOAD_PROPOSALS: false
81
+ MASK_ON: false
82
+ META_ARCHITECTURE: RetinaNet
83
+ PANOPTIC_FPN:
84
+ COMBINE:
85
+ ENABLED: true
86
+ INSTANCES_CONFIDENCE_THRESH: 0.5
87
+ OVERLAP_THRESH: 0.5
88
+ STUFF_AREA_LIMIT: 4096
89
+ INSTANCE_LOSS_WEIGHT: 1.0
90
+ PIXEL_MEAN:
91
+ - 103.53
92
+ - 116.28
93
+ - 123.675
94
+ PIXEL_STD:
95
+ - 1.0
96
+ - 1.0
97
+ - 1.0
98
+ PROPOSAL_GENERATOR:
99
+ MIN_SIZE: 0
100
+ NAME: RPN
101
+ RESNETS:
102
+ DEFORM_MODULATED: false
103
+ DEFORM_NUM_GROUPS: 1
104
+ DEFORM_ON_PER_STAGE:
105
+ - false
106
+ - false
107
+ - false
108
+ - false
109
+ DEPTH: 50
110
+ NORM: FrozenBN
111
+ NUM_GROUPS: 1
112
+ OUT_FEATURES:
113
+ - res3
114
+ - res4
115
+ - res5
116
+ RES2_OUT_CHANNELS: 256
117
+ RES5_DILATION: 1
118
+ STEM_OUT_CHANNELS: 64
119
+ STRIDE_IN_1X1: true
120
+ WIDTH_PER_GROUP: 64
121
+ RETINANET:
122
+ BBOX_REG_WEIGHTS:
123
+ - 1.0
124
+ - 1.0
125
+ - 1.0
126
+ - 1.0
127
+ FOCAL_LOSS_ALPHA: 0.25
128
+ FOCAL_LOSS_GAMMA: 2.0
129
+ IN_FEATURES:
130
+ - p3
131
+ - p4
132
+ - p5
133
+ - p6
134
+ - p7
135
+ IOU_LABELS:
136
+ - 0
137
+ - -1
138
+ - 1
139
+ IOU_THRESHOLDS:
140
+ - 0.4
141
+ - 0.5
142
+ NMS_THRESH_TEST: 0.5
143
+ NUM_CLASSES: 80
144
+ NUM_CONVS: 4
145
+ PRIOR_PROB: 0.01
146
+ SCORE_THRESH_TEST: 0.05
147
+ SMOOTH_L1_LOSS_BETA: 0.1
148
+ TOPK_CANDIDATES_TEST: 1000
149
+ ROI_BOX_CASCADE_HEAD:
150
+ BBOX_REG_WEIGHTS:
151
+ - - 10.0
152
+ - 10.0
153
+ - 5.0
154
+ - 5.0
155
+ - - 20.0
156
+ - 20.0
157
+ - 10.0
158
+ - 10.0
159
+ - - 30.0
160
+ - 30.0
161
+ - 15.0
162
+ - 15.0
163
+ IOUS:
164
+ - 0.5
165
+ - 0.6
166
+ - 0.7
167
+ ROI_BOX_HEAD:
168
+ BBOX_REG_WEIGHTS:
169
+ - 10.0
170
+ - 10.0
171
+ - 5.0
172
+ - 5.0
173
+ CLS_AGNOSTIC_BBOX_REG: false
174
+ CONV_DIM: 256
175
+ FC_DIM: 1024
176
+ NAME: ''
177
+ NORM: ''
178
+ NUM_CONV: 0
179
+ NUM_FC: 0
180
+ POOLER_RESOLUTION: 14
181
+ POOLER_SAMPLING_RATIO: 0
182
+ POOLER_TYPE: ROIAlignV2
183
+ SMOOTH_L1_BETA: 0.0
184
+ TRAIN_ON_PRED_BOXES: false
185
+ ROI_HEADS:
186
+ BATCH_SIZE_PER_IMAGE: 256
187
+ IN_FEATURES:
188
+ - res4
189
+ IOU_LABELS:
190
+ - 0
191
+ - 1
192
+ IOU_THRESHOLDS:
193
+ - 0.5
194
+ NAME: Res5ROIHeads
195
+ NMS_THRESH_TEST: 0.5
196
+ NUM_CLASSES: 8
197
+ POSITIVE_FRACTION: 0.25
198
+ PROPOSAL_APPEND_GT: true
199
+ SCORE_THRESH_TEST: 0.05
200
+ ROI_KEYPOINT_HEAD:
201
+ CONV_DIMS:
202
+ - 512
203
+ - 512
204
+ - 512
205
+ - 512
206
+ - 512
207
+ - 512
208
+ - 512
209
+ - 512
210
+ LOSS_WEIGHT: 1.0
211
+ MIN_KEYPOINTS_PER_IMAGE: 1
212
+ NAME: KRCNNConvDeconvUpsampleHead
213
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
214
+ NUM_KEYPOINTS: 17
215
+ POOLER_RESOLUTION: 14
216
+ POOLER_SAMPLING_RATIO: 0
217
+ POOLER_TYPE: ROIAlignV2
218
+ ROI_MASK_HEAD:
219
+ CLS_AGNOSTIC_MASK: false
220
+ CONV_DIM: 256
221
+ NAME: MaskRCNNConvUpsampleHead
222
+ NORM: ''
223
+ NUM_CONV: 0
224
+ POOLER_RESOLUTION: 14
225
+ POOLER_SAMPLING_RATIO: 0
226
+ POOLER_TYPE: ROIAlignV2
227
+ RPN:
228
+ BATCH_SIZE_PER_IMAGE: 256
229
+ BBOX_REG_WEIGHTS:
230
+ - 1.0
231
+ - 1.0
232
+ - 1.0
233
+ - 1.0
234
+ BOUNDARY_THRESH: -1
235
+ HEAD_NAME: StandardRPNHead
236
+ IN_FEATURES:
237
+ - res4
238
+ IOU_LABELS:
239
+ - 0
240
+ - -1
241
+ - 1
242
+ IOU_THRESHOLDS:
243
+ - 0.3
244
+ - 0.7
245
+ LOSS_WEIGHT: 1.0
246
+ NMS_THRESH: 0.7
247
+ POSITIVE_FRACTION: 0.5
248
+ POST_NMS_TOPK_TEST: 1000
249
+ POST_NMS_TOPK_TRAIN: 2000
250
+ PRE_NMS_TOPK_TEST: 6000
251
+ PRE_NMS_TOPK_TRAIN: 12000
252
+ SMOOTH_L1_BETA: 0.0
253
+ SEM_SEG_HEAD:
254
+ COMMON_STRIDE: 4
255
+ CONVS_DIM: 128
256
+ IGNORE_VALUE: 255
257
+ IN_FEATURES:
258
+ - p2
259
+ - p3
260
+ - p4
261
+ - p5
262
+ LOSS_WEIGHT: 1.0
263
+ NAME: SemSegFPNHead
264
+ NORM: GN
265
+ NUM_CLASSES: 54
266
+ WEIGHTS: https://www.dropbox.com/s/yxsloxu3djt456i/model_final.pth?dl=1
267
+ OUTPUT_DIR: ./train_log/retinanet_R_50_FPN_3x
268
+ SEED: -1
269
+ SOLVER:
270
+ BASE_LR: 0.00025
271
+ BIAS_LR_FACTOR: 1.0
272
+ CHECKPOINT_PERIOD: 30000
273
+ GAMMA: 0.1
274
+ IMS_PER_BATCH: 2
275
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
276
+ MAX_ITER: 60000
277
+ MOMENTUM: 0.9
278
+ STEPS:
279
+ - 210000
280
+ - 250000
281
+ WARMUP_FACTOR: 0.001
282
+ WARMUP_ITERS: 1000
283
+ WARMUP_METHOD: linear
284
+ WEIGHT_DECAY: 0.0001
285
+ WEIGHT_DECAY_BIAS: 0.0001
286
+ WEIGHT_DECAY_NORM: 0.0
287
+ TEST:
288
+ AUG:
289
+ ENABLED: false
290
+ FLIP: true
291
+ MAX_SIZE: 4000
292
+ MIN_SIZES:
293
+ - 400
294
+ - 500
295
+ - 600
296
+ - 700
297
+ - 800
298
+ - 900
299
+ - 1000
300
+ - 1100
301
+ - 1200
302
+ DETECTIONS_PER_IMAGE: 100
303
+ EVAL_PERIOD: 0
304
+ EXPECTED_RESULTS: []
305
+ KEYPOINT_OKS_SIGMAS: []
306
+ PRECISE_BN:
307
+ ENABLED: false
308
+ NUM_ITER: 200
309
+ VERSION: 2
310
+ VIS_PERIOD: 0
models/HJDataset/HJDataset-retinanet_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d0802f7e0805d36c76f533f04a1d20d682dc2d329f4443222a18faf34fd8b2
3
+ size 302947685
MFD-faster_rcnn_R_50_FPN_3x-config.yml → models/MFD/MFD-faster_rcnn_R_50_FPN_3x-config.yml RENAMED
File without changes
MFD-faster_rcnn_R_50_FPN_3x-model.pth → models/MFD/MFD-faster_rcnn_R_50_FPN_3x-model.pth RENAMED
File without changes
models/NewspaperNavigator/NewspaperNavigator-faster_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - coco_2017_val
15
+ TRAIN:
16
+ - coco_2017_train
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: false
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 1.0
87
+ - 1.0
88
+ - 1.0
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 50
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 1
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: true
112
+ WIDTH_PER_GROUP: 64
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS: &id001
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 512
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.1
191
+ NUM_CLASSES: 7
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.5
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS: *id001
225
+ BOUNDARY_THRESH: -1
226
+ HEAD_NAME: StandardRPNHead
227
+ IN_FEATURES:
228
+ - p2
229
+ - p3
230
+ - p4
231
+ - p5
232
+ - p6
233
+ IOU_LABELS:
234
+ - 0
235
+ - -1
236
+ - 1
237
+ IOU_THRESHOLDS:
238
+ - 0.3
239
+ - 0.7
240
+ LOSS_WEIGHT: 1.0
241
+ NMS_THRESH: 0.7
242
+ POSITIVE_FRACTION: 0.5
243
+ POST_NMS_TOPK_TEST: 1000
244
+ POST_NMS_TOPK_TRAIN: 1000
245
+ PRE_NMS_TOPK_TEST: 1000
246
+ PRE_NMS_TOPK_TRAIN: 2000
247
+ SMOOTH_L1_BETA: 0.0
248
+ SEM_SEG_HEAD:
249
+ COMMON_STRIDE: 4
250
+ CONVS_DIM: 128
251
+ IGNORE_VALUE: 255
252
+ IN_FEATURES:
253
+ - p2
254
+ - p3
255
+ - p4
256
+ - p5
257
+ LOSS_WEIGHT: 1.0
258
+ NAME: SemSegFPNHead
259
+ NORM: GN
260
+ NUM_CLASSES: 54
261
+ WEIGHTS: https://www.dropbox.com/s/6ewh6g8rqt2ev3a/model_final.pth?dl=1
262
+ OUTPUT_DIR: ./output
263
+ SEED: -1
264
+ SOLVER:
265
+ BASE_LR: 0.02
266
+ BIAS_LR_FACTOR: 1.0
267
+ CHECKPOINT_PERIOD: 5000
268
+ CLIP_GRADIENTS:
269
+ CLIP_TYPE: value
270
+ CLIP_VALUE: 1.0
271
+ ENABLED: false
272
+ NORM_TYPE: 2.0
273
+ GAMMA: 0.1
274
+ IMS_PER_BATCH: 16
275
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
276
+ MAX_ITER: 270000
277
+ MOMENTUM: 0.9
278
+ NESTEROV: false
279
+ STEPS:
280
+ - 210000
281
+ - 250000
282
+ WARMUP_FACTOR: 0.001
283
+ WARMUP_ITERS: 1000
284
+ WARMUP_METHOD: linear
285
+ WEIGHT_DECAY: 0.0001
286
+ WEIGHT_DECAY_BIAS: 0.0001
287
+ WEIGHT_DECAY_NORM: 0.0
288
+ TEST:
289
+ AUG:
290
+ ENABLED: false
291
+ FLIP: true
292
+ MAX_SIZE: 4000
293
+ MIN_SIZES:
294
+ - 400
295
+ - 500
296
+ - 600
297
+ - 700
298
+ - 800
299
+ - 900
300
+ - 1000
301
+ - 1100
302
+ - 1200
303
+ DETECTIONS_PER_IMAGE: 100
304
+ EVAL_PERIOD: 0
305
+ EXPECTED_RESULTS: []
306
+ KEYPOINT_OKS_SIGMAS: []
307
+ PRECISE_BN:
308
+ ENABLED: false
309
+ NUM_ITER: 200
310
+ VERSION: 2
311
+ VIS_PERIOD: 0
models/NewspaperNavigator/NewspaperNavigator-faster_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05400a04636365ff687e5fce135cf4ebbf863ed5179bb80eee28eb774c37ddbb
3
+ size 330226761
models/PrimaLayout/PrimaLayout-mask_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - prima-layout-val
15
+ TRAIN:
16
+ - prima-layout-train
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: true
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 1.0
87
+ - 1.0
88
+ - 1.0
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 50
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 1
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: true
112
+ WIDTH_PER_GROUP: 64
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS:
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 512
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.5
191
+ NUM_CLASSES: 7
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.05
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS:
225
+ - 1.0
226
+ - 1.0
227
+ - 1.0
228
+ - 1.0
229
+ BOUNDARY_THRESH: -1
230
+ HEAD_NAME: StandardRPNHead
231
+ IN_FEATURES:
232
+ - p2
233
+ - p3
234
+ - p4
235
+ - p5
236
+ - p6
237
+ IOU_LABELS:
238
+ - 0
239
+ - -1
240
+ - 1
241
+ IOU_THRESHOLDS:
242
+ - 0.3
243
+ - 0.7
244
+ LOSS_WEIGHT: 1.0
245
+ NMS_THRESH: 0.7
246
+ POSITIVE_FRACTION: 0.5
247
+ POST_NMS_TOPK_TEST: 1000
248
+ POST_NMS_TOPK_TRAIN: 1000
249
+ PRE_NMS_TOPK_TEST: 1000
250
+ PRE_NMS_TOPK_TRAIN: 2000
251
+ SMOOTH_L1_BETA: 0.0
252
+ SEM_SEG_HEAD:
253
+ COMMON_STRIDE: 4
254
+ CONVS_DIM: 128
255
+ IGNORE_VALUE: 255
256
+ IN_FEATURES:
257
+ - p2
258
+ - p3
259
+ - p4
260
+ - p5
261
+ LOSS_WEIGHT: 1.0
262
+ NAME: SemSegFPNHead
263
+ NORM: GN
264
+ NUM_CLASSES: 54
265
+ WEIGHTS: https://www.dropbox.com/s/h7th27jfv19rxiy/model_final.pth?dl=1
266
+ OUTPUT_DIR: ../outputs/prima/mask_rcnn_R_50_FPN_3x/
267
+ SEED: -1
268
+ SOLVER:
269
+ BASE_LR: 0.00025
270
+ BIAS_LR_FACTOR: 1.0
271
+ CHECKPOINT_PERIOD: 5000
272
+ CLIP_GRADIENTS:
273
+ CLIP_TYPE: value
274
+ CLIP_VALUE: 1.0
275
+ ENABLED: false
276
+ NORM_TYPE: 2.0
277
+ GAMMA: 0.1
278
+ IMS_PER_BATCH: 2
279
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
280
+ MAX_ITER: 60000
281
+ MOMENTUM: 0.9
282
+ NESTEROV: false
283
+ STEPS:
284
+ - 210000
285
+ - 250000
286
+ WARMUP_FACTOR: 0.001
287
+ WARMUP_ITERS: 1000
288
+ WARMUP_METHOD: linear
289
+ WEIGHT_DECAY: 0.0001
290
+ WEIGHT_DECAY_BIAS: 0.0001
291
+ WEIGHT_DECAY_NORM: 0.0
292
+ TEST:
293
+ AUG:
294
+ ENABLED: false
295
+ FLIP: true
296
+ MAX_SIZE: 4000
297
+ MIN_SIZES:
298
+ - 400
299
+ - 500
300
+ - 600
301
+ - 700
302
+ - 800
303
+ - 900
304
+ - 1000
305
+ - 1100
306
+ - 1200
307
+ DETECTIONS_PER_IMAGE: 100
308
+ EVAL_PERIOD: 0
309
+ EXPECTED_RESULTS: []
310
+ KEYPOINT_OKS_SIGMAS: []
311
+ PRECISE_BN:
312
+ ENABLED: false
313
+ NUM_ITER: 200
314
+ VERSION: 2
315
+ VIS_PERIOD: 0
models/PrimaLayout/PrimaLayout-mask_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:618d196349da576d276e7620b29ca4701fb1c186648ad4c631c5e6c479db4628
3
+ size 351229486
models/PubLayNet/PubLayNet-faster_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - PubLayNet-val
15
+ TRAIN:
16
+ - PubLayNet
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: false
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 1.0
87
+ - 1.0
88
+ - 1.0
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 50
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 1
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: true
112
+ WIDTH_PER_GROUP: 64
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS:
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 256
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.5
191
+ NUM_CLASSES: 6
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.05
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS:
225
+ - 1.0
226
+ - 1.0
227
+ - 1.0
228
+ - 1.0
229
+ BOUNDARY_THRESH: -1
230
+ HEAD_NAME: StandardRPNHead
231
+ IN_FEATURES:
232
+ - p2
233
+ - p3
234
+ - p4
235
+ - p5
236
+ - p6
237
+ IOU_LABELS:
238
+ - 0
239
+ - -1
240
+ - 1
241
+ IOU_THRESHOLDS:
242
+ - 0.3
243
+ - 0.7
244
+ LOSS_WEIGHT: 1.0
245
+ NMS_THRESH: 0.7
246
+ POSITIVE_FRACTION: 0.5
247
+ POST_NMS_TOPK_TEST: 1000
248
+ POST_NMS_TOPK_TRAIN: 1000
249
+ PRE_NMS_TOPK_TEST: 1000
250
+ PRE_NMS_TOPK_TRAIN: 2000
251
+ SMOOTH_L1_BETA: 0.0
252
+ SEM_SEG_HEAD:
253
+ COMMON_STRIDE: 4
254
+ CONVS_DIM: 128
255
+ IGNORE_VALUE: 255
256
+ IN_FEATURES:
257
+ - p2
258
+ - p3
259
+ - p4
260
+ - p5
261
+ LOSS_WEIGHT: 1.0
262
+ NAME: SemSegFPNHead
263
+ NORM: GN
264
+ NUM_CLASSES: 54
265
+ WEIGHTS: https://www.dropbox.com/s/dgy9c10wykk4lq4/model_final.pth?dl=1
266
+ OUTPUT_DIR: output/publaynet/faster_rcnn_R_50_FPN_3x/
267
+ SEED: -1
268
+ SOLVER:
269
+ BASE_LR: 0.00025
270
+ BIAS_LR_FACTOR: 1.0
271
+ CHECKPOINT_PERIOD: 5000
272
+ GAMMA: 0.1
273
+ IMS_PER_BATCH: 2
274
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
275
+ MAX_ITER: 15000
276
+ MOMENTUM: 0.9
277
+ STEPS:
278
+ - 210000
279
+ - 250000
280
+ WARMUP_FACTOR: 0.001
281
+ WARMUP_ITERS: 1000
282
+ WARMUP_METHOD: linear
283
+ WEIGHT_DECAY: 0.0001
284
+ WEIGHT_DECAY_BIAS: 0.0001
285
+ WEIGHT_DECAY_NORM: 0.0
286
+ TEST:
287
+ AUG:
288
+ ENABLED: false
289
+ FLIP: true
290
+ MAX_SIZE: 4000
291
+ MIN_SIZES:
292
+ - 400
293
+ - 500
294
+ - 600
295
+ - 700
296
+ - 800
297
+ - 900
298
+ - 1000
299
+ - 1100
300
+ - 1200
301
+ DETECTIONS_PER_IMAGE: 100
302
+ EVAL_PERIOD: 0
303
+ EXPECTED_RESULTS: []
304
+ KEYPOINT_OKS_SIGMAS: []
305
+ PRECISE_BN:
306
+ ENABLED: false
307
+ NUM_ITER: 200
308
+ VERSION: 2
309
+ VIS_PERIOD: 0
models/PubLayNet/PubLayNet-faster_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:309b946892b37230ee07195acee6764b4dbc49a9436cf117862a4eae2cba069d
3
+ size 330185761
models/PubLayNet/PubLayNet-mask_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - PubLayNet-val
15
+ TRAIN:
16
+ - PubLayNet
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: true
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 1.0
87
+ - 1.0
88
+ - 1.0
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 50
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 1
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: true
112
+ WIDTH_PER_GROUP: 64
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS:
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 256
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.5
191
+ NUM_CLASSES: 6
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.05
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS:
225
+ - 1.0
226
+ - 1.0
227
+ - 1.0
228
+ - 1.0
229
+ BOUNDARY_THRESH: -1
230
+ HEAD_NAME: StandardRPNHead
231
+ IN_FEATURES:
232
+ - p2
233
+ - p3
234
+ - p4
235
+ - p5
236
+ - p6
237
+ IOU_LABELS:
238
+ - 0
239
+ - -1
240
+ - 1
241
+ IOU_THRESHOLDS:
242
+ - 0.3
243
+ - 0.7
244
+ LOSS_WEIGHT: 1.0
245
+ NMS_THRESH: 0.7
246
+ POSITIVE_FRACTION: 0.5
247
+ POST_NMS_TOPK_TEST: 1000
248
+ POST_NMS_TOPK_TRAIN: 1000
249
+ PRE_NMS_TOPK_TEST: 1000
250
+ PRE_NMS_TOPK_TRAIN: 2000
251
+ SMOOTH_L1_BETA: 0.0
252
+ SEM_SEG_HEAD:
253
+ COMMON_STRIDE: 4
254
+ CONVS_DIM: 128
255
+ IGNORE_VALUE: 255
256
+ IN_FEATURES:
257
+ - p2
258
+ - p3
259
+ - p4
260
+ - p5
261
+ LOSS_WEIGHT: 1.0
262
+ NAME: SemSegFPNHead
263
+ NORM: GN
264
+ NUM_CLASSES: 54
265
+ WEIGHTS: https://www.dropbox.com/s/d9fc9tahfzyl6df/model_final.pth?dl=1
266
+ OUTPUT_DIR: output/publaynet/mask_rcnn_R_50_FPN_3x/
267
+ SEED: -1
268
+ SOLVER:
269
+ BASE_LR: 0.00025
270
+ BIAS_LR_FACTOR: 1.0
271
+ CHECKPOINT_PERIOD: 5000
272
+ GAMMA: 0.1
273
+ IMS_PER_BATCH: 2
274
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
275
+ MAX_ITER: 15000
276
+ MOMENTUM: 0.9
277
+ STEPS:
278
+ - 210000
279
+ - 250000
280
+ WARMUP_FACTOR: 0.001
281
+ WARMUP_ITERS: 1000
282
+ WARMUP_METHOD: linear
283
+ WEIGHT_DECAY: 0.0001
284
+ WEIGHT_DECAY_BIAS: 0.0001
285
+ WEIGHT_DECAY_NORM: 0.0
286
+ TEST:
287
+ AUG:
288
+ ENABLED: false
289
+ FLIP: true
290
+ MAX_SIZE: 4000
291
+ MIN_SIZES:
292
+ - 400
293
+ - 500
294
+ - 600
295
+ - 700
296
+ - 800
297
+ - 900
298
+ - 1000
299
+ - 1100
300
+ - 1200
301
+ DETECTIONS_PER_IMAGE: 100
302
+ EVAL_PERIOD: 0
303
+ EXPECTED_RESULTS: []
304
+ KEYPOINT_OKS_SIGMAS: []
305
+ PRECISE_BN:
306
+ ENABLED: false
307
+ NUM_ITER: 200
308
+ VERSION: 2
309
+ VIS_PERIOD: 0
models/PubLayNet/PubLayNet-mask_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61777d5cefba11a7bc8491f81b5079683785a80f5f037772718a0da674af2dac
3
+ size 351186091
models/PubLayNet/PubLayNet-mask_rcnn_X_101_32x8d_FPN_3x-config.yml ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - publaynet-val
15
+ TRAIN:
16
+ - publaynet-train
17
+ GLOBAL:
18
+ HACK: 1.0
19
+ INPUT:
20
+ CROP:
21
+ ENABLED: false
22
+ SIZE:
23
+ - 0.9
24
+ - 0.9
25
+ TYPE: relative_range
26
+ FORMAT: BGR
27
+ MASK_FORMAT: polygon
28
+ MAX_SIZE_TEST: 1333
29
+ MAX_SIZE_TRAIN: 1333
30
+ MIN_SIZE_TEST: 800
31
+ MIN_SIZE_TRAIN:
32
+ - 640
33
+ - 672
34
+ - 704
35
+ - 736
36
+ - 768
37
+ - 800
38
+ MIN_SIZE_TRAIN_SAMPLING: choice
39
+ MODEL:
40
+ ANCHOR_GENERATOR:
41
+ ANGLES:
42
+ - - -90
43
+ - 0
44
+ - 90
45
+ ASPECT_RATIOS:
46
+ - - 0.5
47
+ - 1.0
48
+ - 2.0
49
+ NAME: DefaultAnchorGenerator
50
+ OFFSET: 0.0
51
+ SIZES:
52
+ - - 32
53
+ - - 64
54
+ - - 128
55
+ - - 256
56
+ - - 512
57
+ BACKBONE:
58
+ FREEZE_AT: 2
59
+ NAME: build_resnet_fpn_backbone
60
+ DEVICE: cuda
61
+ FPN:
62
+ FUSE_TYPE: sum
63
+ IN_FEATURES:
64
+ - res2
65
+ - res3
66
+ - res4
67
+ - res5
68
+ NORM: ''
69
+ OUT_CHANNELS: 256
70
+ KEYPOINT_ON: false
71
+ LOAD_PROPOSALS: false
72
+ MASK_ON: true
73
+ META_ARCHITECTURE: GeneralizedRCNN
74
+ PANOPTIC_FPN:
75
+ COMBINE:
76
+ ENABLED: true
77
+ INSTANCES_CONFIDENCE_THRESH: 0.5
78
+ OVERLAP_THRESH: 0.5
79
+ STUFF_AREA_LIMIT: 4096
80
+ INSTANCE_LOSS_WEIGHT: 1.0
81
+ PIXEL_MEAN:
82
+ - 103.53
83
+ - 116.28
84
+ - 123.675
85
+ PIXEL_STD:
86
+ - 57.375
87
+ - 57.12
88
+ - 58.395
89
+ PROPOSAL_GENERATOR:
90
+ MIN_SIZE: 0
91
+ NAME: RPN
92
+ RESNETS:
93
+ DEFORM_MODULATED: false
94
+ DEFORM_NUM_GROUPS: 1
95
+ DEFORM_ON_PER_STAGE:
96
+ - false
97
+ - false
98
+ - false
99
+ - false
100
+ DEPTH: 101
101
+ NORM: FrozenBN
102
+ NUM_GROUPS: 32
103
+ OUT_FEATURES:
104
+ - res2
105
+ - res3
106
+ - res4
107
+ - res5
108
+ RES2_OUT_CHANNELS: 256
109
+ RES5_DILATION: 1
110
+ STEM_OUT_CHANNELS: 64
111
+ STRIDE_IN_1X1: false
112
+ WIDTH_PER_GROUP: 8
113
+ RETINANET:
114
+ BBOX_REG_WEIGHTS:
115
+ - 1.0
116
+ - 1.0
117
+ - 1.0
118
+ - 1.0
119
+ FOCAL_LOSS_ALPHA: 0.25
120
+ FOCAL_LOSS_GAMMA: 2.0
121
+ IN_FEATURES:
122
+ - p3
123
+ - p4
124
+ - p5
125
+ - p6
126
+ - p7
127
+ IOU_LABELS:
128
+ - 0
129
+ - -1
130
+ - 1
131
+ IOU_THRESHOLDS:
132
+ - 0.4
133
+ - 0.5
134
+ NMS_THRESH_TEST: 0.5
135
+ NUM_CLASSES: 80
136
+ NUM_CONVS: 4
137
+ PRIOR_PROB: 0.01
138
+ SCORE_THRESH_TEST: 0.05
139
+ SMOOTH_L1_LOSS_BETA: 0.1
140
+ TOPK_CANDIDATES_TEST: 1000
141
+ ROI_BOX_CASCADE_HEAD:
142
+ BBOX_REG_WEIGHTS:
143
+ - - 10.0
144
+ - 10.0
145
+ - 5.0
146
+ - 5.0
147
+ - - 20.0
148
+ - 20.0
149
+ - 10.0
150
+ - 10.0
151
+ - - 30.0
152
+ - 30.0
153
+ - 15.0
154
+ - 15.0
155
+ IOUS:
156
+ - 0.5
157
+ - 0.6
158
+ - 0.7
159
+ ROI_BOX_HEAD:
160
+ BBOX_REG_WEIGHTS:
161
+ - 10.0
162
+ - 10.0
163
+ - 5.0
164
+ - 5.0
165
+ CLS_AGNOSTIC_BBOX_REG: false
166
+ CONV_DIM: 256
167
+ FC_DIM: 1024
168
+ NAME: FastRCNNConvFCHead
169
+ NORM: ''
170
+ NUM_CONV: 0
171
+ NUM_FC: 2
172
+ POOLER_RESOLUTION: 7
173
+ POOLER_SAMPLING_RATIO: 0
174
+ POOLER_TYPE: ROIAlignV2
175
+ SMOOTH_L1_BETA: 0.0
176
+ TRAIN_ON_PRED_BOXES: false
177
+ ROI_HEADS:
178
+ BATCH_SIZE_PER_IMAGE: 512
179
+ IN_FEATURES:
180
+ - p2
181
+ - p3
182
+ - p4
183
+ - p5
184
+ IOU_LABELS:
185
+ - 0
186
+ - 1
187
+ IOU_THRESHOLDS:
188
+ - 0.5
189
+ NAME: StandardROIHeads
190
+ NMS_THRESH_TEST: 0.5
191
+ NUM_CLASSES: 5
192
+ POSITIVE_FRACTION: 0.25
193
+ PROPOSAL_APPEND_GT: true
194
+ SCORE_THRESH_TEST: 0.05
195
+ ROI_KEYPOINT_HEAD:
196
+ CONV_DIMS:
197
+ - 512
198
+ - 512
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ LOSS_WEIGHT: 1.0
206
+ MIN_KEYPOINTS_PER_IMAGE: 1
207
+ NAME: KRCNNConvDeconvUpsampleHead
208
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
209
+ NUM_KEYPOINTS: 17
210
+ POOLER_RESOLUTION: 14
211
+ POOLER_SAMPLING_RATIO: 0
212
+ POOLER_TYPE: ROIAlignV2
213
+ ROI_MASK_HEAD:
214
+ CLS_AGNOSTIC_MASK: false
215
+ CONV_DIM: 256
216
+ NAME: MaskRCNNConvUpsampleHead
217
+ NORM: ''
218
+ NUM_CONV: 4
219
+ POOLER_RESOLUTION: 14
220
+ POOLER_SAMPLING_RATIO: 0
221
+ POOLER_TYPE: ROIAlignV2
222
+ RPN:
223
+ BATCH_SIZE_PER_IMAGE: 256
224
+ BBOX_REG_WEIGHTS:
225
+ - 1.0
226
+ - 1.0
227
+ - 1.0
228
+ - 1.0
229
+ BOUNDARY_THRESH: -1
230
+ HEAD_NAME: StandardRPNHead
231
+ IN_FEATURES:
232
+ - p2
233
+ - p3
234
+ - p4
235
+ - p5
236
+ - p6
237
+ IOU_LABELS:
238
+ - 0
239
+ - -1
240
+ - 1
241
+ IOU_THRESHOLDS:
242
+ - 0.3
243
+ - 0.7
244
+ LOSS_WEIGHT: 1.0
245
+ NMS_THRESH: 0.7
246
+ POSITIVE_FRACTION: 0.5
247
+ POST_NMS_TOPK_TEST: 1000
248
+ POST_NMS_TOPK_TRAIN: 1000
249
+ PRE_NMS_TOPK_TEST: 1000
250
+ PRE_NMS_TOPK_TRAIN: 2000
251
+ SMOOTH_L1_BETA: 0.0
252
+ SEM_SEG_HEAD:
253
+ COMMON_STRIDE: 4
254
+ CONVS_DIM: 128
255
+ IGNORE_VALUE: 255
256
+ IN_FEATURES:
257
+ - p2
258
+ - p3
259
+ - p4
260
+ - p5
261
+ LOSS_WEIGHT: 1.0
262
+ NAME: SemSegFPNHead
263
+ NORM: GN
264
+ NUM_CLASSES: 54
265
+ WEIGHTS: https://www.dropbox.com/s/57zjbwv6gh3srry/model_final.pth?dl=1
266
+ OUTPUT_DIR: ../outputs/publaynet/mask_rcnn_X_101_32x8d_FPN_3x/
267
+ SEED: -1
268
+ SOLVER:
269
+ BASE_LR: 0.0005
270
+ BIAS_LR_FACTOR: 1.0
271
+ CHECKPOINT_PERIOD: 60000
272
+ GAMMA: 0.1
273
+ IMS_PER_BATCH: 2
274
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
275
+ MAX_ITER: 180000
276
+ MOMENTUM: 0.9
277
+ STEPS:
278
+ - 210000
279
+ - 250000
280
+ WARMUP_FACTOR: 0.001
281
+ WARMUP_ITERS: 1000
282
+ WARMUP_METHOD: linear
283
+ WEIGHT_DECAY: 0.0001
284
+ WEIGHT_DECAY_BIAS: 0.0001
285
+ WEIGHT_DECAY_NORM: 0.0
286
+ TEST:
287
+ AUG:
288
+ ENABLED: false
289
+ FLIP: true
290
+ MAX_SIZE: 4000
291
+ MIN_SIZES:
292
+ - 400
293
+ - 500
294
+ - 600
295
+ - 700
296
+ - 800
297
+ - 900
298
+ - 1000
299
+ - 1100
300
+ - 1200
301
+ DETECTIONS_PER_IMAGE: 100
302
+ EVAL_PERIOD: 0
303
+ EXPECTED_RESULTS: []
304
+ KEYPOINT_OKS_SIGMAS: []
305
+ PRECISE_BN:
306
+ ENABLED: false
307
+ NUM_ITER: 200
308
+ VERSION: 2
309
+ VIS_PERIOD: 0
models/PubLayNet/PubLayNet-mask_rcnn_X_101_32x8d_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:250a94ef23ce66174bbfbe19e83a4ddc3fa73a08bc7320c586e733c29a56c47c
3
+ size 856276388
models/TableBank/TableBank-faster_rcnn_R_101_FPN_3x-config.yml ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - word-val
15
+ - latex-val
16
+ TRAIN:
17
+ - word
18
+ - latex
19
+ GLOBAL:
20
+ HACK: 1.0
21
+ INPUT:
22
+ CROP:
23
+ ENABLED: false
24
+ SIZE:
25
+ - 0.9
26
+ - 0.9
27
+ TYPE: relative_range
28
+ FORMAT: BGR
29
+ MASK_FORMAT: polygon
30
+ MAX_SIZE_TEST: 1333
31
+ MAX_SIZE_TRAIN: 1333
32
+ MIN_SIZE_TEST: 800
33
+ MIN_SIZE_TRAIN:
34
+ - 640
35
+ - 672
36
+ - 704
37
+ - 736
38
+ - 768
39
+ - 800
40
+ MIN_SIZE_TRAIN_SAMPLING: choice
41
+ MODEL:
42
+ ANCHOR_GENERATOR:
43
+ ANGLES:
44
+ - - -90
45
+ - 0
46
+ - 90
47
+ ASPECT_RATIOS:
48
+ - - 0.5
49
+ - 1.0
50
+ - 2.0
51
+ NAME: DefaultAnchorGenerator
52
+ OFFSET: 0.0
53
+ SIZES:
54
+ - - 32
55
+ - - 64
56
+ - - 128
57
+ - - 256
58
+ - - 512
59
+ BACKBONE:
60
+ FREEZE_AT: 2
61
+ NAME: build_resnet_fpn_backbone
62
+ DEVICE: cuda
63
+ FPN:
64
+ FUSE_TYPE: sum
65
+ IN_FEATURES:
66
+ - res2
67
+ - res3
68
+ - res4
69
+ - res5
70
+ NORM: ''
71
+ OUT_CHANNELS: 256
72
+ KEYPOINT_ON: false
73
+ LOAD_PROPOSALS: false
74
+ MASK_ON: false
75
+ META_ARCHITECTURE: GeneralizedRCNN
76
+ PANOPTIC_FPN:
77
+ COMBINE:
78
+ ENABLED: true
79
+ INSTANCES_CONFIDENCE_THRESH: 0.5
80
+ OVERLAP_THRESH: 0.5
81
+ STUFF_AREA_LIMIT: 4096
82
+ INSTANCE_LOSS_WEIGHT: 1.0
83
+ PIXEL_MEAN:
84
+ - 103.53
85
+ - 116.28
86
+ - 123.675
87
+ PIXEL_STD:
88
+ - 1.0
89
+ - 1.0
90
+ - 1.0
91
+ PROPOSAL_GENERATOR:
92
+ MIN_SIZE: 0
93
+ NAME: RPN
94
+ RESNETS:
95
+ DEFORM_MODULATED: false
96
+ DEFORM_NUM_GROUPS: 1
97
+ DEFORM_ON_PER_STAGE:
98
+ - false
99
+ - false
100
+ - false
101
+ - false
102
+ DEPTH: 101
103
+ NORM: FrozenBN
104
+ NUM_GROUPS: 1
105
+ OUT_FEATURES:
106
+ - res2
107
+ - res3
108
+ - res4
109
+ - res5
110
+ RES2_OUT_CHANNELS: 256
111
+ RES5_DILATION: 1
112
+ STEM_OUT_CHANNELS: 64
113
+ STRIDE_IN_1X1: true
114
+ WIDTH_PER_GROUP: 64
115
+ RETINANET:
116
+ BBOX_REG_WEIGHTS:
117
+ - 1.0
118
+ - 1.0
119
+ - 1.0
120
+ - 1.0
121
+ FOCAL_LOSS_ALPHA: 0.25
122
+ FOCAL_LOSS_GAMMA: 2.0
123
+ IN_FEATURES:
124
+ - p3
125
+ - p4
126
+ - p5
127
+ - p6
128
+ - p7
129
+ IOU_LABELS:
130
+ - 0
131
+ - -1
132
+ - 1
133
+ IOU_THRESHOLDS:
134
+ - 0.4
135
+ - 0.5
136
+ NMS_THRESH_TEST: 0.5
137
+ NUM_CLASSES: 80
138
+ NUM_CONVS: 4
139
+ PRIOR_PROB: 0.01
140
+ SCORE_THRESH_TEST: 0.05
141
+ SMOOTH_L1_LOSS_BETA: 0.1
142
+ TOPK_CANDIDATES_TEST: 1000
143
+ ROI_BOX_CASCADE_HEAD:
144
+ BBOX_REG_WEIGHTS:
145
+ - - 10.0
146
+ - 10.0
147
+ - 5.0
148
+ - 5.0
149
+ - - 20.0
150
+ - 20.0
151
+ - 10.0
152
+ - 10.0
153
+ - - 30.0
154
+ - 30.0
155
+ - 15.0
156
+ - 15.0
157
+ IOUS:
158
+ - 0.5
159
+ - 0.6
160
+ - 0.7
161
+ ROI_BOX_HEAD:
162
+ BBOX_REG_WEIGHTS:
163
+ - 10.0
164
+ - 10.0
165
+ - 5.0
166
+ - 5.0
167
+ CLS_AGNOSTIC_BBOX_REG: false
168
+ CONV_DIM: 256
169
+ FC_DIM: 1024
170
+ NAME: FastRCNNConvFCHead
171
+ NORM: ''
172
+ NUM_CONV: 0
173
+ NUM_FC: 2
174
+ POOLER_RESOLUTION: 7
175
+ POOLER_SAMPLING_RATIO: 0
176
+ POOLER_TYPE: ROIAlignV2
177
+ SMOOTH_L1_BETA: 0.0
178
+ TRAIN_ON_PRED_BOXES: false
179
+ ROI_HEADS:
180
+ BATCH_SIZE_PER_IMAGE: 512
181
+ IN_FEATURES:
182
+ - p2
183
+ - p3
184
+ - p4
185
+ - p5
186
+ IOU_LABELS:
187
+ - 0
188
+ - 1
189
+ IOU_THRESHOLDS:
190
+ - 0.5
191
+ NAME: StandardROIHeads
192
+ NMS_THRESH_TEST: 0.5
193
+ NUM_CLASSES: 1
194
+ POSITIVE_FRACTION: 0.25
195
+ PROPOSAL_APPEND_GT: true
196
+ SCORE_THRESH_TEST: 0.05
197
+ ROI_KEYPOINT_HEAD:
198
+ CONV_DIMS:
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ - 512
206
+ - 512
207
+ LOSS_WEIGHT: 1.0
208
+ MIN_KEYPOINTS_PER_IMAGE: 1
209
+ NAME: KRCNNConvDeconvUpsampleHead
210
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
211
+ NUM_KEYPOINTS: 17
212
+ POOLER_RESOLUTION: 14
213
+ POOLER_SAMPLING_RATIO: 0
214
+ POOLER_TYPE: ROIAlignV2
215
+ ROI_MASK_HEAD:
216
+ CLS_AGNOSTIC_MASK: false
217
+ CONV_DIM: 256
218
+ NAME: MaskRCNNConvUpsampleHead
219
+ NORM: ''
220
+ NUM_CONV: 4
221
+ POOLER_RESOLUTION: 14
222
+ POOLER_SAMPLING_RATIO: 0
223
+ POOLER_TYPE: ROIAlignV2
224
+ RPN:
225
+ BATCH_SIZE_PER_IMAGE: 256
226
+ BBOX_REG_WEIGHTS:
227
+ - 1.0
228
+ - 1.0
229
+ - 1.0
230
+ - 1.0
231
+ BOUNDARY_THRESH: -1
232
+ HEAD_NAME: StandardRPNHead
233
+ IN_FEATURES:
234
+ - p2
235
+ - p3
236
+ - p4
237
+ - p5
238
+ - p6
239
+ IOU_LABELS:
240
+ - 0
241
+ - -1
242
+ - 1
243
+ IOU_THRESHOLDS:
244
+ - 0.3
245
+ - 0.7
246
+ LOSS_WEIGHT: 1.0
247
+ NMS_THRESH: 0.7
248
+ POSITIVE_FRACTION: 0.5
249
+ POST_NMS_TOPK_TEST: 1000
250
+ POST_NMS_TOPK_TRAIN: 1000
251
+ PRE_NMS_TOPK_TEST: 1000
252
+ PRE_NMS_TOPK_TRAIN: 2000
253
+ SMOOTH_L1_BETA: 0.0
254
+ SEM_SEG_HEAD:
255
+ COMMON_STRIDE: 4
256
+ CONVS_DIM: 128
257
+ IGNORE_VALUE: 255
258
+ IN_FEATURES:
259
+ - p2
260
+ - p3
261
+ - p4
262
+ - p5
263
+ LOSS_WEIGHT: 1.0
264
+ NAME: SemSegFPNHead
265
+ NORM: GN
266
+ NUM_CLASSES: 54
267
+ WEIGHTS: https://www.dropbox.com/s/6vzfk8lk9xvyitg/model_final.pth?dl=1
268
+ OUTPUT_DIR: outputs/faster_rcnn_R_101_FPN_3x/
269
+ SEED: -1
270
+ SOLVER:
271
+ BASE_LR: 0.0005
272
+ BIAS_LR_FACTOR: 1.0
273
+ CHECKPOINT_PERIOD: 30000
274
+ CLIP_GRADIENTS:
275
+ CLIP_TYPE: value
276
+ CLIP_VALUE: 1.0
277
+ ENABLED: false
278
+ NORM_TYPE: 2.0
279
+ GAMMA: 0.1
280
+ IMS_PER_BATCH: 2
281
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
282
+ MAX_ITER: 270000
283
+ MOMENTUM: 0.9
284
+ NESTEROV: false
285
+ STEPS:
286
+ - 210000
287
+ - 250000
288
+ WARMUP_FACTOR: 0.001
289
+ WARMUP_ITERS: 1000
290
+ WARMUP_METHOD: linear
291
+ WEIGHT_DECAY: 0.0001
292
+ WEIGHT_DECAY_BIAS: 0.0001
293
+ WEIGHT_DECAY_NORM: 0.0
294
+ TEST:
295
+ AUG:
296
+ ENABLED: false
297
+ FLIP: true
298
+ MAX_SIZE: 4000
299
+ MIN_SIZES:
300
+ - 400
301
+ - 500
302
+ - 600
303
+ - 700
304
+ - 800
305
+ - 900
306
+ - 1000
307
+ - 1100
308
+ - 1200
309
+ DETECTIONS_PER_IMAGE: 100
310
+ EVAL_PERIOD: 0
311
+ EXPECTED_RESULTS: []
312
+ KEYPOINT_OKS_SIGMAS: []
313
+ PRECISE_BN:
314
+ ENABLED: false
315
+ NUM_ITER: 200
316
+ VERSION: 2
317
+ VIS_PERIOD: 0
models/TableBank/TableBank-faster_rcnn_R_101_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ecc5cf43f91ec16cb0a65555b79b24f1b137f9a3fe3552c5a420dcbf615dcd
3
+ size 481987946
models/TableBank/TableBank-faster_rcnn_R_50_FPN_3x-config.yml ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ FILTER_EMPTY_ANNOTATIONS: true
5
+ NUM_WORKERS: 4
6
+ REPEAT_THRESHOLD: 0.0
7
+ SAMPLER_TRAIN: TrainingSampler
8
+ DATASETS:
9
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
10
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
11
+ PROPOSAL_FILES_TEST: []
12
+ PROPOSAL_FILES_TRAIN: []
13
+ TEST:
14
+ - word-val
15
+ - latex-val
16
+ TRAIN:
17
+ - word
18
+ - latex
19
+ GLOBAL:
20
+ HACK: 1.0
21
+ INPUT:
22
+ CROP:
23
+ ENABLED: false
24
+ SIZE:
25
+ - 0.9
26
+ - 0.9
27
+ TYPE: relative_range
28
+ FORMAT: BGR
29
+ MASK_FORMAT: polygon
30
+ MAX_SIZE_TEST: 1333
31
+ MAX_SIZE_TRAIN: 1333
32
+ MIN_SIZE_TEST: 800
33
+ MIN_SIZE_TRAIN:
34
+ - 640
35
+ - 672
36
+ - 704
37
+ - 736
38
+ - 768
39
+ - 800
40
+ MIN_SIZE_TRAIN_SAMPLING: choice
41
+ MODEL:
42
+ ANCHOR_GENERATOR:
43
+ ANGLES:
44
+ - - -90
45
+ - 0
46
+ - 90
47
+ ASPECT_RATIOS:
48
+ - - 0.5
49
+ - 1.0
50
+ - 2.0
51
+ NAME: DefaultAnchorGenerator
52
+ OFFSET: 0.0
53
+ SIZES:
54
+ - - 32
55
+ - - 64
56
+ - - 128
57
+ - - 256
58
+ - - 512
59
+ BACKBONE:
60
+ FREEZE_AT: 2
61
+ NAME: build_resnet_fpn_backbone
62
+ DEVICE: cuda
63
+ FPN:
64
+ FUSE_TYPE: sum
65
+ IN_FEATURES:
66
+ - res2
67
+ - res3
68
+ - res4
69
+ - res5
70
+ NORM: ''
71
+ OUT_CHANNELS: 256
72
+ KEYPOINT_ON: false
73
+ LOAD_PROPOSALS: false
74
+ MASK_ON: false
75
+ META_ARCHITECTURE: GeneralizedRCNN
76
+ PANOPTIC_FPN:
77
+ COMBINE:
78
+ ENABLED: true
79
+ INSTANCES_CONFIDENCE_THRESH: 0.5
80
+ OVERLAP_THRESH: 0.5
81
+ STUFF_AREA_LIMIT: 4096
82
+ INSTANCE_LOSS_WEIGHT: 1.0
83
+ PIXEL_MEAN:
84
+ - 103.53
85
+ - 116.28
86
+ - 123.675
87
+ PIXEL_STD:
88
+ - 1.0
89
+ - 1.0
90
+ - 1.0
91
+ PROPOSAL_GENERATOR:
92
+ MIN_SIZE: 0
93
+ NAME: RPN
94
+ RESNETS:
95
+ DEFORM_MODULATED: false
96
+ DEFORM_NUM_GROUPS: 1
97
+ DEFORM_ON_PER_STAGE:
98
+ - false
99
+ - false
100
+ - false
101
+ - false
102
+ DEPTH: 50
103
+ NORM: FrozenBN
104
+ NUM_GROUPS: 1
105
+ OUT_FEATURES:
106
+ - res2
107
+ - res3
108
+ - res4
109
+ - res5
110
+ RES2_OUT_CHANNELS: 256
111
+ RES5_DILATION: 1
112
+ STEM_OUT_CHANNELS: 64
113
+ STRIDE_IN_1X1: true
114
+ WIDTH_PER_GROUP: 64
115
+ RETINANET:
116
+ BBOX_REG_WEIGHTS:
117
+ - 1.0
118
+ - 1.0
119
+ - 1.0
120
+ - 1.0
121
+ FOCAL_LOSS_ALPHA: 0.25
122
+ FOCAL_LOSS_GAMMA: 2.0
123
+ IN_FEATURES:
124
+ - p3
125
+ - p4
126
+ - p5
127
+ - p6
128
+ - p7
129
+ IOU_LABELS:
130
+ - 0
131
+ - -1
132
+ - 1
133
+ IOU_THRESHOLDS:
134
+ - 0.4
135
+ - 0.5
136
+ NMS_THRESH_TEST: 0.5
137
+ NUM_CLASSES: 80
138
+ NUM_CONVS: 4
139
+ PRIOR_PROB: 0.01
140
+ SCORE_THRESH_TEST: 0.05
141
+ SMOOTH_L1_LOSS_BETA: 0.1
142
+ TOPK_CANDIDATES_TEST: 1000
143
+ ROI_BOX_CASCADE_HEAD:
144
+ BBOX_REG_WEIGHTS:
145
+ - - 10.0
146
+ - 10.0
147
+ - 5.0
148
+ - 5.0
149
+ - - 20.0
150
+ - 20.0
151
+ - 10.0
152
+ - 10.0
153
+ - - 30.0
154
+ - 30.0
155
+ - 15.0
156
+ - 15.0
157
+ IOUS:
158
+ - 0.5
159
+ - 0.6
160
+ - 0.7
161
+ ROI_BOX_HEAD:
162
+ BBOX_REG_WEIGHTS:
163
+ - 10.0
164
+ - 10.0
165
+ - 5.0
166
+ - 5.0
167
+ CLS_AGNOSTIC_BBOX_REG: false
168
+ CONV_DIM: 256
169
+ FC_DIM: 1024
170
+ NAME: FastRCNNConvFCHead
171
+ NORM: ''
172
+ NUM_CONV: 0
173
+ NUM_FC: 2
174
+ POOLER_RESOLUTION: 7
175
+ POOLER_SAMPLING_RATIO: 0
176
+ POOLER_TYPE: ROIAlignV2
177
+ SMOOTH_L1_BETA: 0.0
178
+ TRAIN_ON_PRED_BOXES: false
179
+ ROI_HEADS:
180
+ BATCH_SIZE_PER_IMAGE: 512
181
+ IN_FEATURES:
182
+ - p2
183
+ - p3
184
+ - p4
185
+ - p5
186
+ IOU_LABELS:
187
+ - 0
188
+ - 1
189
+ IOU_THRESHOLDS:
190
+ - 0.5
191
+ NAME: StandardROIHeads
192
+ NMS_THRESH_TEST: 0.5
193
+ NUM_CLASSES: 1
194
+ POSITIVE_FRACTION: 0.25
195
+ PROPOSAL_APPEND_GT: true
196
+ SCORE_THRESH_TEST: 0.05
197
+ ROI_KEYPOINT_HEAD:
198
+ CONV_DIMS:
199
+ - 512
200
+ - 512
201
+ - 512
202
+ - 512
203
+ - 512
204
+ - 512
205
+ - 512
206
+ - 512
207
+ LOSS_WEIGHT: 1.0
208
+ MIN_KEYPOINTS_PER_IMAGE: 1
209
+ NAME: KRCNNConvDeconvUpsampleHead
210
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
211
+ NUM_KEYPOINTS: 17
212
+ POOLER_RESOLUTION: 14
213
+ POOLER_SAMPLING_RATIO: 0
214
+ POOLER_TYPE: ROIAlignV2
215
+ ROI_MASK_HEAD:
216
+ CLS_AGNOSTIC_MASK: false
217
+ CONV_DIM: 256
218
+ NAME: MaskRCNNConvUpsampleHead
219
+ NORM: ''
220
+ NUM_CONV: 4
221
+ POOLER_RESOLUTION: 14
222
+ POOLER_SAMPLING_RATIO: 0
223
+ POOLER_TYPE: ROIAlignV2
224
+ RPN:
225
+ BATCH_SIZE_PER_IMAGE: 256
226
+ BBOX_REG_WEIGHTS:
227
+ - 1.0
228
+ - 1.0
229
+ - 1.0
230
+ - 1.0
231
+ BOUNDARY_THRESH: -1
232
+ HEAD_NAME: StandardRPNHead
233
+ IN_FEATURES:
234
+ - p2
235
+ - p3
236
+ - p4
237
+ - p5
238
+ - p6
239
+ IOU_LABELS:
240
+ - 0
241
+ - -1
242
+ - 1
243
+ IOU_THRESHOLDS:
244
+ - 0.3
245
+ - 0.7
246
+ LOSS_WEIGHT: 1.0
247
+ NMS_THRESH: 0.7
248
+ POSITIVE_FRACTION: 0.5
249
+ POST_NMS_TOPK_TEST: 1000
250
+ POST_NMS_TOPK_TRAIN: 1000
251
+ PRE_NMS_TOPK_TEST: 1000
252
+ PRE_NMS_TOPK_TRAIN: 2000
253
+ SMOOTH_L1_BETA: 0.0
254
+ SEM_SEG_HEAD:
255
+ COMMON_STRIDE: 4
256
+ CONVS_DIM: 128
257
+ IGNORE_VALUE: 255
258
+ IN_FEATURES:
259
+ - p2
260
+ - p3
261
+ - p4
262
+ - p5
263
+ LOSS_WEIGHT: 1.0
264
+ NAME: SemSegFPNHead
265
+ NORM: GN
266
+ NUM_CLASSES: 54
267
+ WEIGHTS: https://www.dropbox.com/s/8v4uqmz1at9v72a/model_final.pth?dl=1
268
+ OUTPUT_DIR: ../outputs/faster_rcnn_R_50_FPN_3x/
269
+ SEED: -1
270
+ SOLVER:
271
+ BASE_LR: 0.0005
272
+ BIAS_LR_FACTOR: 1.0
273
+ CHECKPOINT_PERIOD: 5000
274
+ CLIP_GRADIENTS:
275
+ CLIP_TYPE: value
276
+ CLIP_VALUE: 1.0
277
+ ENABLED: false
278
+ NORM_TYPE: 2.0
279
+ GAMMA: 0.1
280
+ IMS_PER_BATCH: 2
281
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
282
+ MAX_ITER: 270000
283
+ MOMENTUM: 0.9
284
+ NESTEROV: false
285
+ STEPS:
286
+ - 210000
287
+ - 250000
288
+ WARMUP_FACTOR: 0.001
289
+ WARMUP_ITERS: 1000
290
+ WARMUP_METHOD: linear
291
+ WEIGHT_DECAY: 0.0001
292
+ WEIGHT_DECAY_BIAS: 0.0001
293
+ WEIGHT_DECAY_NORM: 0.0
294
+ TEST:
295
+ AUG:
296
+ ENABLED: false
297
+ FLIP: true
298
+ MAX_SIZE: 4000
299
+ MIN_SIZES:
300
+ - 400
301
+ - 500
302
+ - 600
303
+ - 700
304
+ - 800
305
+ - 900
306
+ - 1000
307
+ - 1100
308
+ - 1200
309
+ DETECTIONS_PER_IMAGE: 100
310
+ EVAL_PERIOD: 0
311
+ EXPECTED_RESULTS: []
312
+ KEYPOINT_OKS_SIGMAS: []
313
+ PRECISE_BN:
314
+ ENABLED: false
315
+ NUM_ITER: 200
316
+ VERSION: 2
317
+ VIS_PERIOD: 0
models/TableBank/TableBank-faster_rcnn_R_50_FPN_3x-model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c563a528dc007d6e5f630c574041be30ab431c098f4c4283e2225f5da04cb59c
3
+ size 329981106
requirements.txt DELETED
@@ -1,10 +0,0 @@
1
- pyyaml==5.*
2
- torch==1.11.*
3
- torchvision==0.11.*
4
-
5
- gradio
6
- numpy
7
- scipy
8
- shapely
9
- timm
10
- opencv-python