|
_base_ = [ |
|
'../../_base_/schedules/schedule_adadelta_18e.py', |
|
'../../_base_/default_runtime.py' |
|
] |
|
|
|
categories = [ |
|
'address', 'book', 'company', 'game', 'government', 'movie', 'name', |
|
'organization', 'position', 'scene' |
|
] |
|
|
|
test_ann_file = 'data/cluener2020/dev.json' |
|
train_ann_file = 'data/cluener2020/train.json' |
|
vocab_file = 'data/cluener2020/vocab.txt' |
|
|
|
max_len = 128 |
|
loader = dict( |
|
type='HardDiskLoader', |
|
repeat=1, |
|
parser=dict(type='LineJsonParser', keys=['text', 'label'])) |
|
|
|
ner_convertor = dict( |
|
type='NerConvertor', |
|
annotation_type='bio', |
|
vocab_file=vocab_file, |
|
categories=categories, |
|
max_len=max_len) |
|
|
|
test_pipeline = [ |
|
dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len), |
|
dict(type='ToTensorNER') |
|
] |
|
|
|
train_pipeline = [ |
|
dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len), |
|
dict(type='ToTensorNER') |
|
] |
|
dataset_type = 'NerDataset' |
|
|
|
train = dict( |
|
type=dataset_type, |
|
ann_file=train_ann_file, |
|
loader=loader, |
|
pipeline=train_pipeline, |
|
test_mode=False) |
|
|
|
test = dict( |
|
type=dataset_type, |
|
ann_file=test_ann_file, |
|
loader=loader, |
|
pipeline=test_pipeline, |
|
test_mode=True) |
|
data = dict( |
|
samples_per_gpu=8, workers_per_gpu=2, train=train, val=test, test=test) |
|
|
|
evaluation = dict(interval=1, metric='f1-score') |
|
|
|
model = dict( |
|
type='NerClassifier', |
|
encoder=dict( |
|
type='BertEncoder', |
|
max_position_embeddings=512, |
|
init_cfg=dict( |
|
type='Pretrained', |
|
checkpoint='https://download.openmmlab.com/mmocr/ner/' |
|
'bert_softmax/bert_pretrain.pth')), |
|
decoder=dict(type='FCDecoder'), |
|
loss=dict(type='MaskedCrossEntropyLoss'), |
|
label_convertor=ner_convertor) |
|
|
|
test_cfg = None |
|
|