Gabriel commited on
Commit
2264efa
1 Parent(s): 470ccae

Upload 3 files

Browse files
RmtDet_regions/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d07f868824148158df90a0cdcb7061af19c88507903be6739209a745cf330a6
3
+ size 462079456
RmtDet_regions/last_checkpoint ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_regions_6/epoch_12.pth
RmtDet_regions/rtmdet_m_textregions_2_concat.py ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet'
2
+ default_hooks = dict(
3
+ timer=dict(type='IterTimerHook'),
4
+ logger=dict(type='LoggerHook', interval=100),
5
+ param_scheduler=dict(type='ParamSchedulerHook'),
6
+ checkpoint=dict(
7
+ type='CheckpointHook', interval=1, max_keep_ckpts=5, save_best='auto'),
8
+ sampler_seed=dict(type='DistSamplerSeedHook'),
9
+ visualization=dict(type='DetVisualizationHook'))
10
+ env_cfg = dict(
11
+ cudnn_benchmark=False,
12
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
13
+ dist_cfg=dict(backend='nccl'))
14
+ vis_backends = [dict(type='LocalVisBackend')]
15
+ visualizer = dict(
16
+ type='DetLocalVisualizer',
17
+ vis_backends=[dict(type='LocalVisBackend')],
18
+ name='visualizer',
19
+ save_dir='/home/erik/Riksarkivet/Projects/HTR_Pipeline/output')
20
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
21
+ log_level = 'INFO'
22
+ load_from = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_regions_6/epoch_11.pth'
23
+ resume = True
24
+ train_cfg = dict(
25
+ type='EpochBasedTrainLoop',
26
+ max_epochs=12,
27
+ val_interval=12,
28
+ dynamic_intervals=[(10, 1)])
29
+ val_cfg = dict(type='ValLoop')
30
+ test_cfg = dict(
31
+ type='TestLoop',
32
+ pipeline=[
33
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
34
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
35
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
36
+ dict(
37
+ type='PackDetInputs',
38
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
39
+ 'scale_factor'))
40
+ ])
41
+ param_scheduler = [
42
+ dict(
43
+ type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0,
44
+ end=1000),
45
+ dict(
46
+ type='CosineAnnealingLR',
47
+ eta_min=1.25e-05,
48
+ begin=6,
49
+ end=12,
50
+ T_max=6,
51
+ by_epoch=True,
52
+ convert_to_iter_based=True)
53
+ ]
54
+ optim_wrapper = dict(
55
+ type='OptimWrapper',
56
+ optimizer=dict(type='AdamW', lr=0.00025, weight_decay=0.05),
57
+ paramwise_cfg=dict(
58
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
59
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
60
+ dataset_type = 'CocoDataset'
61
+ data_root = 'data/coco/'
62
+ file_client_args = dict(backend='disk')
63
+ train_pipeline = [
64
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
65
+ dict(
66
+ type='LoadAnnotations',
67
+ with_bbox=True,
68
+ with_mask=True,
69
+ poly2mask=False),
70
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
71
+ dict(
72
+ type='RandomResize',
73
+ scale=(1280, 1280),
74
+ ratio_range=(0.1, 2.0),
75
+ keep_ratio=True),
76
+ dict(
77
+ type='RandomCrop',
78
+ crop_size=(640, 640),
79
+ recompute_bbox=True,
80
+ allow_negative_crop=True),
81
+ dict(type='YOLOXHSVRandomAug'),
82
+ dict(type='RandomFlip', prob=0.5),
83
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
84
+ dict(
85
+ type='CachedMixUp',
86
+ img_scale=(640, 640),
87
+ ratio_range=(1.0, 1.0),
88
+ max_cached_images=20,
89
+ pad_val=(114, 114, 114)),
90
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
91
+ dict(type='PackDetInputs')
92
+ ]
93
+ test_pipeline = [
94
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
95
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
96
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
97
+ dict(
98
+ type='PackDetInputs',
99
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
100
+ 'scale_factor'))
101
+ ]
102
+ tta_model = dict(
103
+ type='DetTTAModel',
104
+ tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100))
105
+ img_scales = [(640, 640), (320, 320), (960, 960)]
106
+ tta_pipeline = [
107
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
108
+ dict(
109
+ type='TestTimeAug',
110
+ transforms=[[{
111
+ 'type': 'Resize',
112
+ 'scale': (640, 640),
113
+ 'keep_ratio': True
114
+ }, {
115
+ 'type': 'Resize',
116
+ 'scale': (320, 320),
117
+ 'keep_ratio': True
118
+ }, {
119
+ 'type': 'Resize',
120
+ 'scale': (960, 960),
121
+ 'keep_ratio': True
122
+ }],
123
+ [{
124
+ 'type': 'RandomFlip',
125
+ 'prob': 1.0
126
+ }, {
127
+ 'type': 'RandomFlip',
128
+ 'prob': 0.0
129
+ }],
130
+ [{
131
+ 'type': 'Pad',
132
+ 'size': (960, 960),
133
+ 'pad_val': {
134
+ 'img': (114, 114, 114)
135
+ }
136
+ }],
137
+ [{
138
+ 'type':
139
+ 'PackDetInputs',
140
+ 'meta_keys':
141
+ ('img_id', 'img_path', 'ori_shape', 'img_shape',
142
+ 'scale_factor', 'flip', 'flip_direction')
143
+ }]])
144
+ ]
145
+ model = dict(
146
+ type='RTMDet',
147
+ data_preprocessor=dict(
148
+ type='DetDataPreprocessor',
149
+ mean=[103.53, 116.28, 123.675],
150
+ std=[57.375, 57.12, 58.395],
151
+ bgr_to_rgb=False,
152
+ batch_augments=None),
153
+ backbone=dict(
154
+ type='CSPNeXt',
155
+ arch='P5',
156
+ expand_ratio=0.5,
157
+ deepen_factor=0.67,
158
+ widen_factor=0.75,
159
+ channel_attention=True,
160
+ norm_cfg=dict(type='SyncBN'),
161
+ act_cfg=dict(type='SiLU', inplace=True)),
162
+ neck=dict(
163
+ type='CSPNeXtPAFPN',
164
+ in_channels=[192, 384, 768],
165
+ out_channels=192,
166
+ num_csp_blocks=2,
167
+ expand_ratio=0.5,
168
+ norm_cfg=dict(type='SyncBN'),
169
+ act_cfg=dict(type='SiLU', inplace=True)),
170
+ bbox_head=dict(
171
+ type='RTMDetInsSepBNHead',
172
+ num_classes=80,
173
+ in_channels=192,
174
+ stacked_convs=2,
175
+ share_conv=True,
176
+ pred_kernel_size=1,
177
+ feat_channels=192,
178
+ act_cfg=dict(type='SiLU', inplace=True),
179
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
180
+ anchor_generator=dict(
181
+ type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
182
+ bbox_coder=dict(type='DistancePointBBoxCoder'),
183
+ loss_cls=dict(
184
+ type='QualityFocalLoss',
185
+ use_sigmoid=True,
186
+ beta=2.0,
187
+ loss_weight=1.0),
188
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
189
+ loss_mask=dict(
190
+ type='DiceLoss', loss_weight=2.0, eps=5e-06, reduction='mean')),
191
+ train_cfg=dict(
192
+ assigner=dict(type='DynamicSoftLabelAssigner', topk=13),
193
+ allowed_border=-1,
194
+ pos_weight=-1,
195
+ debug=False),
196
+ test_cfg=dict(
197
+ nms_pre=200,
198
+ min_bbox_size=0,
199
+ score_thr=0.4,
200
+ nms=dict(type='nms', iou_threshold=0.6),
201
+ max_per_img=50,
202
+ mask_thr_binary=0.5))
203
+ train_pipeline_stage2 = [
204
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
205
+ dict(
206
+ type='LoadAnnotations',
207
+ with_bbox=True,
208
+ with_mask=True,
209
+ poly2mask=False),
210
+ dict(
211
+ type='RandomResize',
212
+ scale=(640, 640),
213
+ ratio_range=(0.1, 2.0),
214
+ keep_ratio=True),
215
+ dict(
216
+ type='RandomCrop',
217
+ crop_size=(640, 640),
218
+ recompute_bbox=True,
219
+ allow_negative_crop=True),
220
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
221
+ dict(type='YOLOXHSVRandomAug'),
222
+ dict(type='RandomFlip', prob=0.5),
223
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
224
+ dict(type='PackDetInputs')
225
+ ]
226
+ train_dataloader = dict(
227
+ batch_size=2,
228
+ num_workers=1,
229
+ batch_sampler=None,
230
+ pin_memory=True,
231
+ persistent_workers=True,
232
+ sampler=dict(type='DefaultSampler', shuffle=True),
233
+ dataset=dict(
234
+ type='ConcatDataset',
235
+ datasets=[
236
+ dict(
237
+ type='CocoDataset',
238
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
239
+ data_prefix=dict(
240
+ img=
241
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
242
+ ),
243
+ ann_file=
244
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
245
+ pipeline=[
246
+ dict(
247
+ type='LoadImageFromFile',
248
+ file_client_args=dict(backend='disk')),
249
+ dict(
250
+ type='LoadAnnotations',
251
+ with_bbox=True,
252
+ with_mask=True,
253
+ poly2mask=False),
254
+ dict(
255
+ type='CachedMosaic',
256
+ img_scale=(640, 640),
257
+ pad_val=114.0),
258
+ dict(
259
+ type='RandomResize',
260
+ scale=(1280, 1280),
261
+ ratio_range=(0.1, 2.0),
262
+ keep_ratio=True),
263
+ dict(
264
+ type='RandomCrop',
265
+ crop_size=(640, 640),
266
+ recompute_bbox=True,
267
+ allow_negative_crop=True),
268
+ dict(type='YOLOXHSVRandomAug'),
269
+ dict(type='RandomFlip', prob=0.5),
270
+ dict(
271
+ type='Pad',
272
+ size=(640, 640),
273
+ pad_val=dict(img=(114, 114, 114))),
274
+ dict(
275
+ type='CachedMixUp',
276
+ img_scale=(640, 640),
277
+ ratio_range=(1.0, 1.0),
278
+ max_cached_images=20,
279
+ pad_val=(114, 114, 114)),
280
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
281
+ dict(type='PackDetInputs')
282
+ ]),
283
+ dict(
284
+ type='CocoDataset',
285
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
286
+ data_prefix=dict(
287
+ img=
288
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
289
+ ),
290
+ ann_file=
291
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
292
+ pipeline=[
293
+ dict(
294
+ type='LoadImageFromFile',
295
+ file_client_args=dict(backend='disk')),
296
+ dict(
297
+ type='LoadAnnotations',
298
+ with_bbox=True,
299
+ with_mask=True,
300
+ poly2mask=False),
301
+ dict(
302
+ type='CachedMosaic',
303
+ img_scale=(640, 640),
304
+ pad_val=114.0),
305
+ dict(
306
+ type='RandomResize',
307
+ scale=(1280, 1280),
308
+ ratio_range=(0.1, 2.0),
309
+ keep_ratio=True),
310
+ dict(
311
+ type='RandomCrop',
312
+ crop_size=(640, 640),
313
+ recompute_bbox=True,
314
+ allow_negative_crop=True),
315
+ dict(type='YOLOXHSVRandomAug'),
316
+ dict(type='RandomFlip', prob=0.5),
317
+ dict(
318
+ type='Pad',
319
+ size=(640, 640),
320
+ pad_val=dict(img=(114, 114, 114))),
321
+ dict(
322
+ type='CachedMixUp',
323
+ img_scale=(640, 640),
324
+ ratio_range=(1.0, 1.0),
325
+ max_cached_images=20,
326
+ pad_val=(114, 114, 114)),
327
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
328
+ dict(type='PackDetInputs')
329
+ ])
330
+ ]))
331
+ val_dataloader = dict(
332
+ batch_size=1,
333
+ num_workers=10,
334
+ dataset=dict(
335
+ pipeline=[
336
+ dict(
337
+ type='LoadImageFromFile',
338
+ file_client_args=dict(backend='disk')),
339
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
340
+ dict(
341
+ type='Pad', size=(640, 640),
342
+ pad_val=dict(img=(114, 114, 114))),
343
+ dict(
344
+ type='PackDetInputs',
345
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
346
+ 'scale_factor'))
347
+ ],
348
+ type='CocoDataset',
349
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
350
+ data_prefix=dict(
351
+ img=
352
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
353
+ ),
354
+ ann_file=
355
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
356
+ test_mode=True),
357
+ persistent_workers=True,
358
+ drop_last=False,
359
+ sampler=dict(type='DefaultSampler', shuffle=False))
360
+ test_dataloader = dict(
361
+ batch_size=1,
362
+ num_workers=10,
363
+ dataset=dict(
364
+ pipeline=[
365
+ dict(
366
+ type='LoadImageFromFile',
367
+ file_client_args=dict(backend='disk')),
368
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
369
+ dict(
370
+ type='Pad', size=(640, 640),
371
+ pad_val=dict(img=(114, 114, 114))),
372
+ dict(
373
+ type='PackDetInputs',
374
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
375
+ 'scale_factor'))
376
+ ],
377
+ type='CocoDataset',
378
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
379
+ data_prefix=dict(
380
+ img=
381
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
382
+ ),
383
+ ann_file=
384
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
385
+ test_mode=True),
386
+ persistent_workers=True,
387
+ drop_last=False,
388
+ sampler=dict(type='DefaultSampler', shuffle=False))
389
+ max_epochs = 12
390
+ stage2_num_epochs = 2
391
+ base_lr = 0.00025
392
+ interval = 12
393
+ val_evaluator = dict(
394
+ proposal_nums=(100, 1, 10),
395
+ metric=['bbox', 'segm'],
396
+ type='CocoMetric',
397
+ ann_file=
398
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json'
399
+ )
400
+ test_evaluator = dict(
401
+ proposal_nums=(100, 1, 10),
402
+ metric=['bbox', 'segm'],
403
+ type='CocoMetric',
404
+ ann_file=
405
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json'
406
+ )
407
+ custom_hooks = [
408
+ dict(
409
+ type='EMAHook',
410
+ ema_type='ExpMomentumEMA',
411
+ momentum=0.0002,
412
+ update_buffers=True,
413
+ priority=49),
414
+ dict(
415
+ type='PipelineSwitchHook',
416
+ switch_epoch=10,
417
+ switch_pipeline=[
418
+ dict(
419
+ type='LoadImageFromFile',
420
+ file_client_args=dict(backend='disk')),
421
+ dict(
422
+ type='LoadAnnotations',
423
+ with_bbox=True,
424
+ with_mask=True,
425
+ poly2mask=False),
426
+ dict(
427
+ type='RandomResize',
428
+ scale=(640, 640),
429
+ ratio_range=(0.1, 2.0),
430
+ keep_ratio=True),
431
+ dict(
432
+ type='RandomCrop',
433
+ crop_size=(640, 640),
434
+ recompute_bbox=True,
435
+ allow_negative_crop=True),
436
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
437
+ dict(type='YOLOXHSVRandomAug'),
438
+ dict(type='RandomFlip', prob=0.5),
439
+ dict(
440
+ type='Pad', size=(640, 640),
441
+ pad_val=dict(img=(114, 114, 114))),
442
+ dict(type='PackDetInputs')
443
+ ])
444
+ ]
445
+ work_dir = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_regions_6'
446
+ train_batch_size_per_gpu = 2
447
+ val_batch_size_per_gpu = 1
448
+ train_num_workers = 1
449
+ num_classes = 1
450
+ metainfo = dict(classes='TextRegion', palette=[(220, 20, 60)])
451
+ icdar_2019 = dict(
452
+ type='CocoDataset',
453
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
454
+ data_prefix=dict(
455
+ img=
456
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
457
+ ),
458
+ ann_file=
459
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
460
+ pipeline=[
461
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
462
+ dict(
463
+ type='LoadAnnotations',
464
+ with_bbox=True,
465
+ with_mask=True,
466
+ poly2mask=False),
467
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
468
+ dict(
469
+ type='RandomResize',
470
+ scale=(1280, 1280),
471
+ ratio_range=(0.1, 2.0),
472
+ keep_ratio=True),
473
+ dict(
474
+ type='RandomCrop',
475
+ crop_size=(640, 640),
476
+ recompute_bbox=True,
477
+ allow_negative_crop=True),
478
+ dict(type='YOLOXHSVRandomAug'),
479
+ dict(type='RandomFlip', prob=0.5),
480
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
481
+ dict(
482
+ type='CachedMixUp',
483
+ img_scale=(640, 640),
484
+ ratio_range=(1.0, 1.0),
485
+ max_cached_images=20,
486
+ pad_val=(114, 114, 114)),
487
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
488
+ dict(type='PackDetInputs')
489
+ ])
490
+ icdar_2019_test = dict(
491
+ type='CocoDataset',
492
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
493
+ data_prefix=dict(
494
+ img=
495
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
496
+ ),
497
+ ann_file=
498
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
499
+ test_mode=True,
500
+ pipeline=[
501
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
502
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
503
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
504
+ dict(
505
+ type='PackDetInputs',
506
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
507
+ 'scale_factor'))
508
+ ])
509
+ police_records = dict(
510
+ type='CocoDataset',
511
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
512
+ data_prefix=dict(
513
+ img=
514
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
515
+ ),
516
+ ann_file=
517
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
518
+ pipeline=[
519
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
520
+ dict(
521
+ type='LoadAnnotations',
522
+ with_bbox=True,
523
+ with_mask=True,
524
+ poly2mask=False),
525
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
526
+ dict(
527
+ type='RandomResize',
528
+ scale=(1280, 1280),
529
+ ratio_range=(0.1, 2.0),
530
+ keep_ratio=True),
531
+ dict(
532
+ type='RandomCrop',
533
+ crop_size=(640, 640),
534
+ recompute_bbox=True,
535
+ allow_negative_crop=True),
536
+ dict(type='YOLOXHSVRandomAug'),
537
+ dict(type='RandomFlip', prob=0.5),
538
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
539
+ dict(
540
+ type='CachedMixUp',
541
+ img_scale=(640, 640),
542
+ ratio_range=(1.0, 1.0),
543
+ max_cached_images=20,
544
+ pad_val=(114, 114, 114)),
545
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
546
+ dict(type='PackDetInputs')
547
+ ])
548
+ train_list = [
549
+ dict(
550
+ type='CocoDataset',
551
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
552
+ data_prefix=dict(
553
+ img=
554
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
555
+ ),
556
+ ann_file=
557
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
558
+ pipeline=[
559
+ dict(
560
+ type='LoadImageFromFile',
561
+ file_client_args=dict(backend='disk')),
562
+ dict(
563
+ type='LoadAnnotations',
564
+ with_bbox=True,
565
+ with_mask=True,
566
+ poly2mask=False),
567
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
568
+ dict(
569
+ type='RandomResize',
570
+ scale=(1280, 1280),
571
+ ratio_range=(0.1, 2.0),
572
+ keep_ratio=True),
573
+ dict(
574
+ type='RandomCrop',
575
+ crop_size=(640, 640),
576
+ recompute_bbox=True,
577
+ allow_negative_crop=True),
578
+ dict(type='YOLOXHSVRandomAug'),
579
+ dict(type='RandomFlip', prob=0.5),
580
+ dict(
581
+ type='Pad', size=(640, 640),
582
+ pad_val=dict(img=(114, 114, 114))),
583
+ dict(
584
+ type='CachedMixUp',
585
+ img_scale=(640, 640),
586
+ ratio_range=(1.0, 1.0),
587
+ max_cached_images=20,
588
+ pad_val=(114, 114, 114)),
589
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
590
+ dict(type='PackDetInputs')
591
+ ]),
592
+ dict(
593
+ type='CocoDataset',
594
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
595
+ data_prefix=dict(
596
+ img=
597
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
598
+ ),
599
+ ann_file=
600
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
601
+ pipeline=[
602
+ dict(
603
+ type='LoadImageFromFile',
604
+ file_client_args=dict(backend='disk')),
605
+ dict(
606
+ type='LoadAnnotations',
607
+ with_bbox=True,
608
+ with_mask=True,
609
+ poly2mask=False),
610
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
611
+ dict(
612
+ type='RandomResize',
613
+ scale=(1280, 1280),
614
+ ratio_range=(0.1, 2.0),
615
+ keep_ratio=True),
616
+ dict(
617
+ type='RandomCrop',
618
+ crop_size=(640, 640),
619
+ recompute_bbox=True,
620
+ allow_negative_crop=True),
621
+ dict(type='YOLOXHSVRandomAug'),
622
+ dict(type='RandomFlip', prob=0.5),
623
+ dict(
624
+ type='Pad', size=(640, 640),
625
+ pad_val=dict(img=(114, 114, 114))),
626
+ dict(
627
+ type='CachedMixUp',
628
+ img_scale=(640, 640),
629
+ ratio_range=(1.0, 1.0),
630
+ max_cached_images=20,
631
+ pad_val=(114, 114, 114)),
632
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
633
+ dict(type='PackDetInputs')
634
+ ])
635
+ ]
636
+ test_list = [
637
+ dict(
638
+ type='CocoDataset',
639
+ metainfo=dict(classes='TextRegion', palette=[(220, 20, 60)]),
640
+ data_prefix=dict(
641
+ img=
642
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
643
+ ),
644
+ ann_file=
645
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
646
+ test_mode=True,
647
+ pipeline=[
648
+ dict(
649
+ type='LoadImageFromFile',
650
+ file_client_args=dict(backend='disk')),
651
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
652
+ dict(
653
+ type='Pad', size=(640, 640),
654
+ pad_val=dict(img=(114, 114, 114))),
655
+ dict(
656
+ type='PackDetInputs',
657
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
658
+ 'scale_factor'))
659
+ ])
660
+ ]
661
+ pipeline = [
662
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
663
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
664
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
665
+ dict(
666
+ type='PackDetInputs',
667
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
668
+ 'scale_factor'))
669
+ ]
670
+ launcher = 'pytorch'