|
default_scope = 'mmdet' |
|
default_hooks = dict( |
|
timer=dict(type='IterTimerHook'), |
|
logger=dict(type='LoggerHook', interval=100), |
|
param_scheduler=dict(type='ParamSchedulerHook'), |
|
checkpoint=dict( |
|
type='CheckpointHook', interval=1, max_keep_ckpts=5, save_best='auto'), |
|
sampler_seed=dict(type='DistSamplerSeedHook'), |
|
visualization=dict(type='DetVisualizationHook')) |
|
env_cfg = dict( |
|
cudnn_benchmark=False, |
|
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), |
|
dist_cfg=dict(backend='nccl')) |
|
vis_backends = [dict(type='LocalVisBackend')] |
|
visualizer = dict( |
|
type='DetLocalVisualizer', |
|
vis_backends=[dict(type='LocalVisBackend')], |
|
name='visualizer', |
|
save_dir='./') |
|
log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) |
|
log_level = 'INFO' |
|
load_from = './model.pth' |
|
resume = True |
|
train_cfg = dict( |
|
type='EpochBasedTrainLoop', |
|
max_epochs=12, |
|
val_interval=12, |
|
dynamic_intervals=[(10, 1)]) |
|
val_cfg = dict(type='ValLoop') |
|
test_cfg = dict( |
|
type='TestLoop', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
]) |
|
param_scheduler = [ |
|
dict( |
|
type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0, |
|
end=1000), |
|
dict( |
|
type='CosineAnnealingLR', |
|
eta_min=1.25e-05, |
|
begin=6, |
|
end=12, |
|
T_max=6, |
|
by_epoch=True, |
|
convert_to_iter_based=True) |
|
] |
|
optim_wrapper = dict( |
|
type='OptimWrapper', |
|
optimizer=dict(type='AdamW', lr=0.00025, weight_decay=0.05), |
|
paramwise_cfg=dict( |
|
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) |
|
auto_scale_lr = dict(enable=False, base_batch_size=16) |
|
dataset_type = 'CocoDataset' |
|
data_root = 'data/coco/' |
|
file_client_args = dict(backend='disk') |
|
train_pipeline = [ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), |
|
dict( |
|
type='RandomResize', |
|
scale=(1280, 1280), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='CachedMixUp', |
|
img_scale=(640, 640), |
|
ratio_range=(1.0, 1.0), |
|
max_cached_images=20, |
|
pad_val=(114, 114, 114)), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='PackDetInputs') |
|
] |
|
test_pipeline = [ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
] |
|
tta_model = dict( |
|
type='DetTTAModel', |
|
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100)) |
|
img_scales = [(640, 640), (320, 320), (960, 960)] |
|
tta_pipeline = [ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict( |
|
type='TestTimeAug', |
|
transforms=[[{ |
|
'type': 'Resize', |
|
'scale': (640, 640), |
|
'keep_ratio': True |
|
}, { |
|
'type': 'Resize', |
|
'scale': (320, 320), |
|
'keep_ratio': True |
|
}, { |
|
'type': 'Resize', |
|
'scale': (960, 960), |
|
'keep_ratio': True |
|
}], |
|
[{ |
|
'type': 'RandomFlip', |
|
'prob': 1.0 |
|
}, { |
|
'type': 'RandomFlip', |
|
'prob': 0.0 |
|
}], |
|
[{ |
|
'type': 'Pad', |
|
'size': (960, 960), |
|
'pad_val': { |
|
'img': (114, 114, 114) |
|
} |
|
}], |
|
[{ |
|
'type': |
|
'PackDetInputs', |
|
'meta_keys': |
|
('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor', 'flip', 'flip_direction') |
|
}]]) |
|
] |
|
model = dict( |
|
type='RTMDet', |
|
data_preprocessor=dict( |
|
type='DetDataPreprocessor', |
|
mean=[103.53, 116.28, 123.675], |
|
std=[57.375, 57.12, 58.395], |
|
bgr_to_rgb=False, |
|
batch_augments=None), |
|
backbone=dict( |
|
type='CSPNeXt', |
|
arch='P5', |
|
expand_ratio=0.5, |
|
deepen_factor=0.67, |
|
widen_factor=0.75, |
|
channel_attention=True, |
|
norm_cfg=dict(type='SyncBN'), |
|
act_cfg=dict(type='SiLU', inplace=True)), |
|
neck=dict( |
|
type='CSPNeXtPAFPN', |
|
in_channels=[192, 384, 768], |
|
out_channels=192, |
|
num_csp_blocks=2, |
|
expand_ratio=0.5, |
|
norm_cfg=dict(type='SyncBN'), |
|
act_cfg=dict(type='SiLU', inplace=True)), |
|
bbox_head=dict( |
|
type='RTMDetInsSepBNHead', |
|
num_classes=80, |
|
in_channels=192, |
|
stacked_convs=2, |
|
share_conv=True, |
|
pred_kernel_size=1, |
|
feat_channels=192, |
|
act_cfg=dict(type='SiLU', inplace=True), |
|
norm_cfg=dict(type='SyncBN', requires_grad=True), |
|
anchor_generator=dict( |
|
type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]), |
|
bbox_coder=dict(type='DistancePointBBoxCoder'), |
|
loss_cls=dict( |
|
type='QualityFocalLoss', |
|
use_sigmoid=True, |
|
beta=2.0, |
|
loss_weight=1.0), |
|
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), |
|
loss_mask=dict( |
|
type='DiceLoss', loss_weight=2.0, eps=5e-06, reduction='mean')), |
|
train_cfg=dict( |
|
assigner=dict(type='DynamicSoftLabelAssigner', topk=13), |
|
allowed_border=-1, |
|
pos_weight=-1, |
|
debug=False), |
|
test_cfg=dict( |
|
nms_pre=400, |
|
min_bbox_size=0, |
|
score_thr=0.4, |
|
nms=dict(type='nms', iou_threshold=0.6), |
|
max_per_img=50, |
|
mask_thr_binary=0.5)) |
|
train_pipeline_stage2 = [ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict( |
|
type='RandomResize', |
|
scale=(640, 640), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict(type='PackDetInputs') |
|
] |
|
train_dataloader = dict( |
|
batch_size=2, |
|
num_workers=1, |
|
batch_sampler=None, |
|
pin_memory=True, |
|
persistent_workers=True, |
|
sampler=dict(type='DefaultSampler', shuffle=True), |
|
dataset=dict( |
|
type='ConcatDataset', |
|
datasets=[ |
|
dict( |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json', |
|
pipeline=[ |
|
dict( |
|
type='LoadImageFromFile', |
|
file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict( |
|
type='CachedMosaic', |
|
img_scale=(640, 640), |
|
pad_val=114.0), |
|
dict( |
|
type='RandomResize', |
|
scale=(1280, 1280), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict( |
|
type='Pad', |
|
size=(640, 640), |
|
pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='CachedMixUp', |
|
img_scale=(640, 640), |
|
ratio_range=(1.0, 1.0), |
|
max_cached_images=20, |
|
pad_val=(114, 114, 114)), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='PackDetInputs') |
|
]) |
|
])) |
|
val_dataloader = dict( |
|
batch_size=1, |
|
num_workers=10, |
|
dataset=dict( |
|
pipeline=[ |
|
dict( |
|
type='LoadImageFromFile', |
|
file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict( |
|
type='Pad', size=(640, 640), |
|
pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
], |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json', |
|
test_mode=True), |
|
persistent_workers=True, |
|
drop_last=False, |
|
sampler=dict(type='DefaultSampler', shuffle=False)) |
|
test_dataloader = dict( |
|
batch_size=1, |
|
num_workers=10, |
|
dataset=dict( |
|
pipeline=[ |
|
dict( |
|
type='LoadImageFromFile', |
|
file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict( |
|
type='Pad', size=(640, 640), |
|
pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
], |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json', |
|
test_mode=True), |
|
persistent_workers=True, |
|
drop_last=False, |
|
sampler=dict(type='DefaultSampler', shuffle=False)) |
|
max_epochs = 12 |
|
stage2_num_epochs = 2 |
|
base_lr = 0.00025 |
|
interval = 12 |
|
val_evaluator = dict( |
|
proposal_nums=(100, 1, 10), |
|
metric=['bbox', 'segm'], |
|
type='CocoMetric', |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json' |
|
) |
|
test_evaluator = dict( |
|
proposal_nums=(100, 1, 10), |
|
metric=['bbox', 'segm'], |
|
type='CocoMetric', |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json' |
|
) |
|
custom_hooks = [ |
|
dict( |
|
type='EMAHook', |
|
ema_type='ExpMomentumEMA', |
|
momentum=0.0002, |
|
update_buffers=True, |
|
priority=49), |
|
dict( |
|
type='PipelineSwitchHook', |
|
switch_epoch=10, |
|
switch_pipeline=[ |
|
dict( |
|
type='LoadImageFromFile', |
|
file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict( |
|
type='RandomResize', |
|
scale=(640, 640), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict( |
|
type='Pad', size=(640, 640), |
|
pad_val=dict(img=(114, 114, 114))), |
|
dict(type='PackDetInputs') |
|
]) |
|
] |
|
work_dir = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_lines_pr_2' |
|
train_batch_size_per_gpu = 2 |
|
val_batch_size_per_gpu = 1 |
|
train_num_workers = 1 |
|
num_classes = 1 |
|
metainfo = dict(classes='text_line', palette=[(220, 20, 60)]) |
|
icdar_2019 = dict( |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), |
|
dict( |
|
type='RandomResize', |
|
scale=(1280, 1280), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='CachedMixUp', |
|
img_scale=(640, 640), |
|
ratio_range=(1.0, 1.0), |
|
max_cached_images=20, |
|
pad_val=(114, 114, 114)), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='PackDetInputs') |
|
]) |
|
icdar_2019_test = dict( |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_lines.json', |
|
test_mode=True, |
|
pipeline=[ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
]) |
|
police_records = dict( |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), |
|
dict( |
|
type='RandomResize', |
|
scale=(1280, 1280), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='CachedMixUp', |
|
img_scale=(640, 640), |
|
ratio_range=(1.0, 1.0), |
|
max_cached_images=20, |
|
pad_val=(114, 114, 114)), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='PackDetInputs') |
|
]) |
|
train_list = [ |
|
dict( |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_lines2.json', |
|
pipeline=[ |
|
dict( |
|
type='LoadImageFromFile', |
|
file_client_args=dict(backend='disk')), |
|
dict( |
|
type='LoadAnnotations', |
|
with_bbox=True, |
|
with_mask=True, |
|
poly2mask=False), |
|
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), |
|
dict( |
|
type='RandomResize', |
|
scale=(1280, 1280), |
|
ratio_range=(0.1, 2.0), |
|
keep_ratio=True), |
|
dict( |
|
type='RandomCrop', |
|
crop_size=(640, 640), |
|
recompute_bbox=True, |
|
allow_negative_crop=True), |
|
dict(type='YOLOXHSVRandomAug'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict( |
|
type='Pad', size=(640, 640), |
|
pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='CachedMixUp', |
|
img_scale=(640, 640), |
|
ratio_range=(1.0, 1.0), |
|
max_cached_images=20, |
|
pad_val=(114, 114, 114)), |
|
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)), |
|
dict(type='PackDetInputs') |
|
]) |
|
] |
|
test_list = [ |
|
dict( |
|
type='CocoDataset', |
|
metainfo=dict(classes='text_line', palette=[(220, 20, 60)]), |
|
data_prefix=dict( |
|
img= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/' |
|
), |
|
ann_file= |
|
'/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_lines.json', |
|
test_mode=True, |
|
pipeline=[ |
|
dict( |
|
type='LoadImageFromFile', |
|
file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict( |
|
type='Pad', size=(640, 640), |
|
pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
]) |
|
] |
|
pipeline = [ |
|
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), |
|
dict(type='Resize', scale=(640, 640), keep_ratio=True), |
|
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
] |
|
launcher = 'pytorch' |
|
|