Image Segmentation
HTRflow
Gabriel commited on
Commit
ffab01d
1 Parent(s): 798ff65

Upload 2 files

Browse files
epoch_12 (1).pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e356d393c6ed916b2b1ac085b3ef6075e1cbdad0a3148756f03bbcda41f2d658
3
+ size 474957088
rtmdet_m_textregions_2_concat.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet'
2
+ default_hooks = dict(
3
+ timer=dict(type='IterTimerHook'),
4
+ logger=dict(type='LoggerHook', interval=100),
5
+ param_scheduler=dict(type='ParamSchedulerHook'),
6
+ checkpoint=dict(
7
+ type='CheckpointHook', interval=1, max_keep_ckpts=5, save_best='auto'),
8
+ sampler_seed=dict(type='DistSamplerSeedHook'),
9
+ visualization=dict(type='DetVisualizationHook'))
10
+ env_cfg = dict(
11
+ cudnn_benchmark=False,
12
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
13
+ dist_cfg=dict(backend='nccl'))
14
+ vis_backends = [dict(type='LocalVisBackend')]
15
+ visualizer = dict(
16
+ type='DetLocalVisualizer',
17
+ vis_backends=[dict(type='LocalVisBackend')],
18
+ name='visualizer',
19
+ save_dir='./')
20
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
21
+ log_level = 'INFO'
22
+ load_from = './epoch_12.pth'
23
+ resume = True
24
+ train_cfg = dict(
25
+ type='EpochBasedTrainLoop',
26
+ max_epochs=12,
27
+ val_interval=12,
28
+ dynamic_intervals=[(10, 1)])
29
+ val_cfg = dict(type='ValLoop')
30
+ test_cfg = dict(
31
+ type='TestLoop',
32
+ pipeline=[
33
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
34
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
35
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
36
+ dict(
37
+ type='PackDetInputs',
38
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
39
+ 'scale_factor'))
40
+ ])
41
+ param_scheduler = [
42
+ dict(
43
+ type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0,
44
+ end=1000),
45
+ dict(
46
+ type='CosineAnnealingLR',
47
+ eta_min=1.25e-05,
48
+ begin=6,
49
+ end=12,
50
+ T_max=6,
51
+ by_epoch=True,
52
+ convert_to_iter_based=True)
53
+ ]
54
+ optim_wrapper = dict(
55
+ type='OptimWrapper',
56
+ optimizer=dict(type='AdamW', lr=0.00025, weight_decay=0.05),
57
+ paramwise_cfg=dict(
58
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
59
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
60
+ dataset_type = 'CocoDataset'
61
+ data_root = 'data/coco/'
62
+ file_client_args = dict(backend='disk')
63
+ train_pipeline = [
64
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
65
+ dict(
66
+ type='LoadAnnotations',
67
+ with_bbox=True,
68
+ with_mask=True,
69
+ poly2mask=False),
70
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
71
+ dict(
72
+ type='RandomResize',
73
+ scale=(1280, 1280),
74
+ ratio_range=(0.1, 2.0),
75
+ keep_ratio=True),
76
+ dict(
77
+ type='RandomCrop',
78
+ crop_size=(640, 640),
79
+ recompute_bbox=True,
80
+ allow_negative_crop=True),
81
+ dict(type='YOLOXHSVRandomAug'),
82
+ dict(type='RandomFlip', prob=0.5),
83
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
84
+ dict(
85
+ type='CachedMixUp',
86
+ img_scale=(640, 640),
87
+ ratio_range=(1.0, 1.0),
88
+ max_cached_images=20,
89
+ pad_val=(114, 114, 114)),
90
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
91
+ dict(type='PackDetInputs')
92
+ ]
93
+ test_pipeline = [
94
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
95
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
96
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
97
+ dict(
98
+ type='PackDetInputs',
99
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
100
+ 'scale_factor'))
101
+ ]
102
+ tta_model = dict(
103
+ type='DetTTAModel',
104
+ tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100))
105
+ img_scales = [(640, 640), (320, 320), (960, 960)]
106
+ tta_pipeline = [
107
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
108
+ dict(
109
+ type='TestTimeAug',
110
+ transforms=[[{
111
+ 'type': 'Resize',
112
+ 'scale': (640, 640),
113
+ 'keep_ratio': True
114
+ }, {
115
+ 'type': 'Resize',
116
+ 'scale': (320, 320),
117
+ 'keep_ratio': True
118
+ }, {
119
+ 'type': 'Resize',
120
+ 'scale': (960, 960),
121
+ 'keep_ratio': True
122
+ }],
123
+ [{
124
+ 'type': 'RandomFlip',
125
+ 'prob': 1.0
126
+ }, {
127
+ 'type': 'RandomFlip',
128
+ 'prob': 0.0
129
+ }],
130
+ [{
131
+ 'type': 'Pad',
132
+ 'size': (960, 960),
133
+ 'pad_val': {
134
+ 'img': (114, 114, 114)
135
+ }
136
+ }],
137
+ [{
138
+ 'type':
139
+ 'PackDetInputs',
140
+ 'meta_keys':
141
+ ('img_id', 'img_path', 'ori_shape', 'img_shape',
142
+ 'scale_factor', 'flip', 'flip_direction')
143
+ }]])
144
+ ]
145
+ model = dict(
146
+ type='RTMDet',
147
+ data_preprocessor=dict(
148
+ type='DetDataPreprocessor',
149
+ mean=[103.53, 116.28, 123.675],
150
+ std=[57.375, 57.12, 58.395],
151
+ bgr_to_rgb=False,
152
+ batch_augments=None),
153
+ backbone=dict(
154
+ type='CSPNeXt',
155
+ arch='P5',
156
+ expand_ratio=0.5,
157
+ deepen_factor=0.67,
158
+ widen_factor=0.75,
159
+ channel_attention=True,
160
+ norm_cfg=dict(type='SyncBN'),
161
+ act_cfg=dict(type='SiLU', inplace=True)),
162
+ neck=dict(
163
+ type='CSPNeXtPAFPN',
164
+ in_channels=[192, 384, 768],
165
+ out_channels=192,
166
+ num_csp_blocks=2,
167
+ expand_ratio=0.5,
168
+ norm_cfg=dict(type='SyncBN'),
169
+ act_cfg=dict(type='SiLU', inplace=True)),
170
+ bbox_head=dict(
171
+ type='RTMDetInsSepBNHead',
172
+ num_classes=80,
173
+ in_channels=192,
174
+ stacked_convs=2,
175
+ share_conv=True,
176
+ pred_kernel_size=1,
177
+ feat_channels=192,
178
+ act_cfg=dict(type='SiLU', inplace=True),
179
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
180
+ anchor_generator=dict(
181
+ type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
182
+ bbox_coder=dict(type='DistancePointBBoxCoder'),
183
+ loss_cls=dict(
184
+ type='QualityFocalLoss',
185
+ use_sigmoid=True,
186
+ beta=2.0,
187
+ loss_weight=1.0),
188
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
189
+ loss_mask=dict(
190
+ type='DiceLoss', loss_weight=2.0, eps=5e-06, reduction='mean')),
191
+ train_cfg=dict(
192
+ assigner=dict(type='DynamicSoftLabelAssigner', topk=13),
193
+ allowed_border=-1,
194
+ pos_weight=-1,
195
+ debug=False),
196
+ test_cfg=dict(
197
+ nms_pre=400,
198
+ min_bbox_size=0,
199
+ score_thr=0.4,
200
+ nms=dict(type='nms', iou_threshold=0.6),
201
+ max_per_img=50,
202
+ mask_thr_binary=0.5))
203
+ train_pipeline_stage2 = [
204
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
205
+ dict(
206
+ type='LoadAnnotations',
207
+ with_bbox=True,
208
+ with_mask=True,
209
+ poly2mask=False),
210
+ dict(
211
+ type='RandomResize',
212
+ scale=(640, 640),
213
+ ratio_range=(0.1, 2.0),
214
+ keep_ratio=True),
215
+ dict(
216
+ type='RandomCrop',
217
+ crop_size=(640, 640),
218
+ recompute_bbox=True,
219
+ allow_negative_crop=True),
220
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
221
+ dict(type='YOLOXHSVRandomAug'),
222
+ dict(type='RandomFlip', prob=0.5),
223
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
224
+ dict(type='PackDetInputs')
225
+ ]
226
+ train_dataloader = dict(
227
+ batch_size=2,
228
+ num_workers=1,
229
+ batch_sampler=None,
230
+ pin_memory=True,
231
+ persistent_workers=True,
232
+ sampler=dict(type='DefaultSampler', shuffle=True),
233
+ dataset=dict(
234
+ type='ConcatDataset',
235
+ datasets=[
236
+ dict(
237
+ type='CocoDataset',
238
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
239
+ data_prefix=dict(
240
+ img=
241
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
242
+ ),
243
+ ann_file=
244
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json',
245
+ pipeline=[
246
+ dict(
247
+ type='LoadImageFromFile',
248
+ file_client_args=dict(backend='disk')),
249
+ dict(
250
+ type='LoadAnnotations',
251
+ with_bbox=True,
252
+ with_mask=True,
253
+ poly2mask=False),
254
+ dict(
255
+ type='CachedMosaic',
256
+ img_scale=(640, 640),
257
+ pad_val=114.0),
258
+ dict(
259
+ type='RandomResize',
260
+ scale=(1280, 1280),
261
+ ratio_range=(0.1, 2.0),
262
+ keep_ratio=True),
263
+ dict(
264
+ type='RandomCrop',
265
+ crop_size=(640, 640),
266
+ recompute_bbox=True,
267
+ allow_negative_crop=True),
268
+ dict(type='YOLOXHSVRandomAug'),
269
+ dict(type='RandomFlip', prob=0.5),
270
+ dict(
271
+ type='Pad',
272
+ size=(640, 640),
273
+ pad_val=dict(img=(114, 114, 114))),
274
+ dict(
275
+ type='CachedMixUp',
276
+ img_scale=(640, 640),
277
+ ratio_range=(1.0, 1.0),
278
+ max_cached_images=20,
279
+ pad_val=(114, 114, 114)),
280
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
281
+ dict(type='PackDetInputs')
282
+ ])
283
+ ]))
284
+ val_dataloader = dict(
285
+ batch_size=1,
286
+ num_workers=10,
287
+ dataset=dict(
288
+ pipeline=[
289
+ dict(
290
+ type='LoadImageFromFile',
291
+ file_client_args=dict(backend='disk')),
292
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
293
+ dict(
294
+ type='Pad', size=(640, 640),
295
+ pad_val=dict(img=(114, 114, 114))),
296
+ dict(
297
+ type='PackDetInputs',
298
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
299
+ 'scale_factor'))
300
+ ],
301
+ type='CocoDataset',
302
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
303
+ data_prefix=dict(
304
+ img=
305
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
306
+ ),
307
+ ann_file=
308
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
309
+ test_mode=True),
310
+ persistent_workers=True,
311
+ drop_last=False,
312
+ sampler=dict(type='DefaultSampler', shuffle=False))
313
+ test_dataloader = dict(
314
+ batch_size=1,
315
+ num_workers=10,
316
+ dataset=dict(
317
+ pipeline=[
318
+ dict(
319
+ type='LoadImageFromFile',
320
+ file_client_args=dict(backend='disk')),
321
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
322
+ dict(
323
+ type='Pad', size=(640, 640),
324
+ pad_val=dict(img=(114, 114, 114))),
325
+ dict(
326
+ type='PackDetInputs',
327
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
328
+ 'scale_factor'))
329
+ ],
330
+ type='CocoDataset',
331
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
332
+ data_prefix=dict(
333
+ img=
334
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
335
+ ),
336
+ ann_file=
337
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
338
+ test_mode=True),
339
+ persistent_workers=True,
340
+ drop_last=False,
341
+ sampler=dict(type='DefaultSampler', shuffle=False))
342
+ max_epochs = 12
343
+ stage2_num_epochs = 2
344
+ base_lr = 0.00025
345
+ interval = 12
346
+ val_evaluator = dict(
347
+ proposal_nums=(100, 1, 10),
348
+ metric=['bbox', 'segm'],
349
+ type='CocoMetric',
350
+ ann_file=
351
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json'
352
+ )
353
+ test_evaluator = dict(
354
+ proposal_nums=(100, 1, 10),
355
+ metric=['bbox', 'segm'],
356
+ type='CocoMetric',
357
+ ann_file=
358
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json'
359
+ )
360
+ custom_hooks = [
361
+ dict(
362
+ type='EMAHook',
363
+ ema_type='ExpMomentumEMA',
364
+ momentum=0.0002,
365
+ update_buffers=True,
366
+ priority=49),
367
+ dict(
368
+ type='PipelineSwitchHook',
369
+ switch_epoch=10,
370
+ switch_pipeline=[
371
+ dict(
372
+ type='LoadImageFromFile',
373
+ file_client_args=dict(backend='disk')),
374
+ dict(
375
+ type='LoadAnnotations',
376
+ with_bbox=True,
377
+ with_mask=True,
378
+ poly2mask=False),
379
+ dict(
380
+ type='RandomResize',
381
+ scale=(640, 640),
382
+ ratio_range=(0.1, 2.0),
383
+ keep_ratio=True),
384
+ dict(
385
+ type='RandomCrop',
386
+ crop_size=(640, 640),
387
+ recompute_bbox=True,
388
+ allow_negative_crop=True),
389
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
390
+ dict(type='YOLOXHSVRandomAug'),
391
+ dict(type='RandomFlip', prob=0.5),
392
+ dict(
393
+ type='Pad', size=(640, 640),
394
+ pad_val=dict(img=(114, 114, 114))),
395
+ dict(type='PackDetInputs')
396
+ ])
397
+ ]
398
+ work_dir = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_lines_pr_2'
399
+ train_batch_size_per_gpu = 2
400
+ val_batch_size_per_gpu = 1
401
+ train_num_workers = 1
402
+ num_classes = 1
403
+ metainfo = dict(classes='text_line', palette=[(220, 20, 60)])
404
+ icdar_2019 = dict(
405
+ type='CocoDataset',
406
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
407
+ data_prefix=dict(
408
+ img=
409
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
410
+ ),
411
+ ann_file=
412
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
413
+ pipeline=[
414
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
415
+ dict(
416
+ type='LoadAnnotations',
417
+ with_bbox=True,
418
+ with_mask=True,
419
+ poly2mask=False),
420
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
421
+ dict(
422
+ type='RandomResize',
423
+ scale=(1280, 1280),
424
+ ratio_range=(0.1, 2.0),
425
+ keep_ratio=True),
426
+ dict(
427
+ type='RandomCrop',
428
+ crop_size=(640, 640),
429
+ recompute_bbox=True,
430
+ allow_negative_crop=True),
431
+ dict(type='YOLOXHSVRandomAug'),
432
+ dict(type='RandomFlip', prob=0.5),
433
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
434
+ dict(
435
+ type='CachedMixUp',
436
+ img_scale=(640, 640),
437
+ ratio_range=(1.0, 1.0),
438
+ max_cached_images=20,
439
+ pad_val=(114, 114, 114)),
440
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
441
+ dict(type='PackDetInputs')
442
+ ])
443
+ icdar_2019_test = dict(
444
+ type='CocoDataset',
445
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
446
+ data_prefix=dict(
447
+ img=
448
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
449
+ ),
450
+ ann_file=
451
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_lines.json',
452
+ test_mode=True,
453
+ pipeline=[
454
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
455
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
456
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
457
+ dict(
458
+ type='PackDetInputs',
459
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
460
+ 'scale_factor'))
461
+ ])
462
+ police_records = dict(
463
+ type='CocoDataset',
464
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
465
+ data_prefix=dict(
466
+ img=
467
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
468
+ ),
469
+ ann_file=
470
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json',
471
+ pipeline=[
472
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
473
+ dict(
474
+ type='LoadAnnotations',
475
+ with_bbox=True,
476
+ with_mask=True,
477
+ poly2mask=False),
478
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
479
+ dict(
480
+ type='RandomResize',
481
+ scale=(1280, 1280),
482
+ ratio_range=(0.1, 2.0),
483
+ keep_ratio=True),
484
+ dict(
485
+ type='RandomCrop',
486
+ crop_size=(640, 640),
487
+ recompute_bbox=True,
488
+ allow_negative_crop=True),
489
+ dict(type='YOLOXHSVRandomAug'),
490
+ dict(type='RandomFlip', prob=0.5),
491
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
492
+ dict(
493
+ type='CachedMixUp',
494
+ img_scale=(640, 640),
495
+ ratio_range=(1.0, 1.0),
496
+ max_cached_images=20,
497
+ pad_val=(114, 114, 114)),
498
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
499
+ dict(type='PackDetInputs')
500
+ ])
501
+ train_list = [
502
+ dict(
503
+ type='CocoDataset',
504
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
505
+ data_prefix=dict(
506
+ img=
507
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'
508
+ ),
509
+ ann_file=
510
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json',
511
+ pipeline=[
512
+ dict(
513
+ type='LoadImageFromFile',
514
+ file_client_args=dict(backend='disk')),
515
+ dict(
516
+ type='LoadAnnotations',
517
+ with_bbox=True,
518
+ with_mask=True,
519
+ poly2mask=False),
520
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
521
+ dict(
522
+ type='RandomResize',
523
+ scale=(1280, 1280),
524
+ ratio_range=(0.1, 2.0),
525
+ keep_ratio=True),
526
+ dict(
527
+ type='RandomCrop',
528
+ crop_size=(640, 640),
529
+ recompute_bbox=True,
530
+ allow_negative_crop=True),
531
+ dict(type='YOLOXHSVRandomAug'),
532
+ dict(type='RandomFlip', prob=0.5),
533
+ dict(
534
+ type='Pad', size=(640, 640),
535
+ pad_val=dict(img=(114, 114, 114))),
536
+ dict(
537
+ type='CachedMixUp',
538
+ img_scale=(640, 640),
539
+ ratio_range=(1.0, 1.0),
540
+ max_cached_images=20,
541
+ pad_val=(114, 114, 114)),
542
+ dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
543
+ dict(type='PackDetInputs')
544
+ ])
545
+ ]
546
+ test_list = [
547
+ dict(
548
+ type='CocoDataset',
549
+ metainfo=dict(classes='text_line', palette=[(220, 20, 60)]),
550
+ data_prefix=dict(
551
+ img=
552
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'
553
+ ),
554
+ ann_file=
555
+ '/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_lines.json',
556
+ test_mode=True,
557
+ pipeline=[
558
+ dict(
559
+ type='LoadImageFromFile',
560
+ file_client_args=dict(backend='disk')),
561
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
562
+ dict(
563
+ type='Pad', size=(640, 640),
564
+ pad_val=dict(img=(114, 114, 114))),
565
+ dict(
566
+ type='PackDetInputs',
567
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
568
+ 'scale_factor'))
569
+ ])
570
+ ]
571
+ pipeline = [
572
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
573
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
574
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
575
+ dict(
576
+ type='PackDetInputs',
577
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
578
+ 'scale_factor'))
579
+ ]
580
+ launcher = 'pytorch'