_base_ = [ '../../_base_/schedules/schedule_adadelta_18e.py', '../../_base_/default_runtime.py' ] categories = [ 'address', 'book', 'company', 'game', 'government', 'movie', 'name', 'organization', 'position', 'scene' ] test_ann_file = 'data/cluener2020/dev.json' train_ann_file = 'data/cluener2020/train.json' vocab_file = 'data/cluener2020/vocab.txt' max_len = 128 loader = dict( type='HardDiskLoader', repeat=1, parser=dict(type='LineJsonParser', keys=['text', 'label'])) ner_convertor = dict( type='NerConvertor', annotation_type='bio', vocab_file=vocab_file, categories=categories, max_len=max_len) test_pipeline = [ dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len), dict(type='ToTensorNER') ] train_pipeline = [ dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len), dict(type='ToTensorNER') ] dataset_type = 'NerDataset' train = dict( type=dataset_type, ann_file=train_ann_file, loader=loader, pipeline=train_pipeline, test_mode=False) test = dict( type=dataset_type, ann_file=test_ann_file, loader=loader, pipeline=test_pipeline, test_mode=True) data = dict( samples_per_gpu=8, workers_per_gpu=2, train=train, val=test, test=test) evaluation = dict(interval=1, metric='f1-score') model = dict( type='NerClassifier', encoder=dict( type='BertEncoder', max_position_embeddings=512, init_cfg=dict( type='Pretrained', checkpoint='https://download.openmmlab.com/mmocr/ner/' 'bert_softmax/bert_pretrain.pth')), decoder=dict(type='FCDecoder'), loss=dict(type='MaskedCrossEntropyLoss'), label_convertor=ner_convertor) test_cfg = None