img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) max_scale, min_scale = 1024, 512 train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='KIEFormatBundle'), dict( type='Collect', keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='KIEFormatBundle'), dict( type='Collect', keys=['img', 'relations', 'texts', 'gt_bboxes'], meta_keys=[ 'img_norm_cfg', 'img_shape', 'ori_filename', 'filename', 'ori_texts' ]) ] dataset_type = 'KIEDataset' data_root = 'data/wildreceipt' loader = dict( type='HardDiskLoader', repeat=1, parser=dict( type='LineJsonParser', keys=['file_name', 'height', 'width', 'annotations'])) train = dict( type=dataset_type, ann_file=f'{data_root}/train.txt', pipeline=train_pipeline, img_prefix=data_root, loader=loader, dict_file=f'{data_root}/dict.txt', test_mode=False) test = dict( type=dataset_type, ann_file=f'{data_root}/test.txt', pipeline=test_pipeline, img_prefix=data_root, loader=loader, dict_file=f'{data_root}/dict.txt', test_mode=True) data = dict( samples_per_gpu=4, workers_per_gpu=4, val_dataloader=dict(samples_per_gpu=1), test_dataloader=dict(samples_per_gpu=1), train=train, val=test, test=test) evaluation = dict( interval=1, metric='macro_f1', metric_options=dict( macro_f1=dict( ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]))) model = dict( type='SDMGR', backbone=dict(type='UNet', base_channels=16), bbox_head=dict( type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26), visual_modality=True, train_cfg=None, test_cfg=None, class_list=f'{data_root}/class_list.txt') optimizer = dict(type='Adam', weight_decay=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict( policy='step', warmup='linear', warmup_iters=1, warmup_ratio=1, step=[40, 50]) total_epochs = 60 checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] find_unused_parameters = True