asahi417 commited on
Commit
97be1f3
1 Parent(s): 9002347

model update

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "cner_output/model/self_training_2020/roberta_large_concat/best_model",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
1
  {
2
+ "_name_or_path": "roberta-large",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
eval/metric.json CHANGED
@@ -1 +1 @@
1
- {"2020.dev": {"micro/f1": 0.6523160762942778, "micro/f1_ci": {"90": [0.6301119860516167, 0.6721999498732909], "95": [0.6256827352272616, 0.6757145784071992]}, "micro/recall": 0.6253918495297806, "micro/precision": 0.6816628701594533, "macro/f1": 0.5960387703961427, "macro/f1_ci": {"90": [0.571860117919907, 0.617661520243254], "95": [0.5672314417365343, 0.6202644786819803]}, "macro/recall": 0.5765828618482965, "macro/precision": 0.6202749646978413, "per_entity_metric": {"corporation": {"f1": 0.4921465968586387, "f1_ci": {"90": [0.43304843304843305, 0.5463809523809523], "95": [0.4216516516516517, 0.5544566186476468]}, "precision": 0.5251396648044693, "recall": 0.4630541871921182}, "creative_work": {"f1": 0.5247524752475247, "f1_ci": {"90": [0.4631527093596059, 0.5783719493358047], "95": [0.45061708860759486, 0.5882562526198128]}, "precision": 0.5408163265306123, "recall": 0.5096153846153846}, "event": {"f1": 0.40444444444444444, "f1_ci": {"90": [0.34857507363839324, 0.45773952929126493], "95": [0.33752301495972376, 0.46963807824646364]}, "precision": 0.4690721649484536, "recall": 0.35546875}, "group": {"f1": 0.5734265734265733, "f1_ci": {"90": [0.5195848977889027, 0.6235089347876538], "95": [0.5085383982879639, 0.6311954653622422]}, "precision": 0.6089108910891089, "recall": 0.5418502202643172}, "location": {"f1": 0.6424870466321243, "f1_ci": {"90": [0.584929693961952, 0.7013022593844511], "95": [0.5721876216426626, 0.7116691849775172]}, "precision": 0.6048780487804878, "recall": 0.6850828729281768}, "person": {"f1": 0.876949740034662, "f1_ci": {"90": [0.8540133592752779, 0.8966143947714545], "95": [0.8488131754161957, 0.9000865946986364]}, "precision": 0.9100719424460432, "recall": 0.8461538461538461}, "product": {"f1": 0.6580645161290323, "f1_ci": {"90": [0.6111050061050062, 0.7009104549496147], "95": [0.6012197931968087, 0.7075507982583455]}, "precision": 0.6830357142857143, "recall": 0.6348547717842323}}}, "2021.test": {"micro/f1": 0.6471284450205715, "micro/f1_ci": {"90": [0.6382491652207657, 0.6571440963111985], "95": [0.6359081332383856, 0.6582859698963258]}, "micro/recall": 0.6638529139685476, "micro/precision": 0.6312259483232545, "macro/f1": 0.5955097840001077, "macro/f1_ci": {"90": [0.5857013413106968, 0.6055702643415463], "95": [0.5835376526150466, 0.6075732530625402]}, "macro/recall": 0.617986494624747, "macro/precision": 0.5777914210368434, "per_entity_metric": {"corporation": {"f1": 0.5036119711042312, "f1_ci": {"90": [0.4806534455925017, 0.5284623414821599], "95": [0.47407347957909757, 0.5324233733880906]}, "precision": 0.4701348747591522, "recall": 0.5422222222222223}, "creative_work": {"f1": 0.46229307173513184, "f1_ci": {"90": [0.4310155557125255, 0.4922005901052454], "95": [0.4260459552184904, 0.49937002056668195]}, "precision": 0.41888888888888887, "recall": 0.5157318741450069}, "event": {"f1": 0.45454545454545453, "f1_ci": {"90": [0.4323456884934265, 0.47844747633814705], "95": [0.42856966141231356, 0.4837483853429276]}, "precision": 0.4972972972972973, "recall": 0.41856232939035487}, "group": {"f1": 0.6081483935077839, "f1_ci": {"90": [0.5873861120091016, 0.6308477697125173], "95": [0.5844401885267315, 0.6347920120955058]}, "precision": 0.6115922718187875, "recall": 0.6047430830039525}, "location": {"f1": 0.6567357512953369, "f1_ci": {"90": [0.6301160529567461, 0.6841498978590616], "95": [0.6245743302822452, 0.6896575134021583]}, "precision": 0.6123188405797102, "recall": 0.7081005586592178}, "person": {"f1": 0.8438177874186551, "f1_ci": {"90": [0.8338994566452618, 0.8542835068961404], "95": [0.8317271168545677, 0.8561935629542744]}, "precision": 0.8276595744680851, "recall": 0.8606194690265486}, "product": {"f1": 0.6394160583941606, "f1_ci": {"90": [0.6165983929954278, 0.6601031411157993], "95": [0.6126448024549289, 0.6633775040050582]}, "precision": 0.6066481994459834, "recall": 0.6759259259259259}}}, "2020.test": {"micro/f1": 0.6520799567801189, "micro/f1_ci": {"90": [0.6319589319349793, 0.6705298013245033], "95": [0.6271340834021469, 0.6740808787763395]}, "micro/recall": 0.6263622210690192, "micro/precision": 0.68, "macro/f1": 0.6098311514418262, "macro/f1_ci": {"90": [0.5874617884091958, 0.6291356876675341], "95": [0.5835113812973399, 0.6340845225545556]}, "macro/recall": 0.591770125645325, "macro/precision": 0.6333934951371484, "per_entity_metric": {"corporation": {"f1": 0.5544554455445545, "f1_ci": {"90": [0.498602163445507, 0.6066403961872304], "95": [0.4897792812777284, 0.6180048661800486]}, "precision": 0.5258215962441315, "recall": 0.5863874345549738}, "creative_work": {"f1": 0.5, "f1_ci": {"90": [0.4389666618559677, 0.5535367360116793], "95": [0.431355196946035, 0.5617461496218268]}, "precision": 0.4972375690607735, "recall": 0.5027932960893855}, "event": {"f1": 0.4661016949152542, "f1_ci": {"90": [0.4144606957987239, 0.5166514049403684], "95": [0.4009106544028492, 0.5269581090982591]}, "precision": 0.5314009661835749, "recall": 0.41509433962264153}, "group": {"f1": 0.5688405797101449, "f1_ci": {"90": [0.519832461752942, 0.6197849713684702], "95": [0.5116894943301686, 0.6318136568099699]}, "precision": 0.6514522821576764, "recall": 0.5048231511254019}, "location": {"f1": 0.676923076923077, "f1_ci": {"90": [0.6149558679306963, 0.7297417595637181], "95": [0.6062495573654391, 0.7428611332801277]}, "precision": 0.6875, "recall": 0.6666666666666666}, "person": {"f1": 0.8373702422145329, "f1_ci": {"90": [0.8122615081885932, 0.8590102707749767], "95": [0.8066747941433811, 0.8649141578594952]}, "precision": 0.8642857142857143, "recall": 0.8120805369127517}, "product": {"f1": 0.6651270207852195, "f1_ci": {"90": [0.6133880489560835, 0.7139751552795033], "95": [0.6018397241204758, 0.7209418026148326]}, "precision": 0.676056338028169, "recall": 0.6545454545454545}}}, "2021.test (span detection)": {"micro/f1": 0.7871716830120618, "micro/f1_ci": {}, "micro/recall": 0.8075633167572569, "micro/precision": 0.7677844969763606, "macro/f1": 0.7871716830120618, "macro/f1_ci": {}, "macro/recall": 0.8075633167572569, "macro/precision": 0.7677844969763606}, "2020.test (span detection)": {"micro/f1": 0.7676931388438681, "micro/f1_ci": {}, "micro/recall": 0.7374156720290607, "micro/precision": 0.8005633802816902, "macro/f1": 0.7676931388438681, "macro/f1_ci": {}, "macro/recall": 0.7374156720290607, "macro/precision": 0.8005633802816902}, "2020.dev (span detection)": {"micro/f1": 0.7689373297002725, "micro/f1_ci": {}, "micro/recall": 0.7371995820271683, "micro/precision": 0.8035307517084282, "macro/f1": 0.7689373297002725, "macro/f1_ci": {}, "macro/recall": 0.7371995820271683, "macro/precision": 0.8035307517084282}}
1
+ {"2020.dev": {"micro/f1": 0.649414328520839, "micro/f1_ci": {}, "micro/recall": 0.6227795193312434, "micro/precision": 0.678429140580535, "macro/f1": 0.590650897262125, "macro/f1_ci": {}, "macro/recall": 0.5744024079725654, "macro/precision": 0.6134283006959169, "per_entity_metric": {"corporation": {"f1": 0.49871465295629824, "f1_ci": {}, "precision": 0.521505376344086, "recall": 0.47783251231527096}, "creative_work": {"f1": 0.48258706467661694, "f1_ci": {}, "precision": 0.5, "recall": 0.46634615384615385}, "event": {"f1": 0.38337182448036955, "f1_ci": {}, "precision": 0.4689265536723164, "recall": 0.32421875}, "group": {"f1": 0.5794392523364486, "f1_ci": {}, "precision": 0.6169154228855721, "recall": 0.5462555066079295}, "location": {"f1": 0.6548223350253807, "f1_ci": {}, "precision": 0.6056338028169014, "recall": 0.712707182320442}, "person": {"f1": 0.8815331010452963, "f1_ci": {}, "precision": 0.92, "recall": 0.8461538461538461}, "product": {"f1": 0.6540880503144655, "f1_ci": {}, "precision": 0.6610169491525424, "recall": 0.6473029045643154}}}, "2021.test": {"micro/f1": 0.6545742216194834, "micro/f1_ci": {"90": [0.6459013617167609, 0.6637399915981033], "95": [0.6439605146787715, 0.6661442289789786]}, "micro/recall": 0.669750231267345, "micro/precision": 0.640070726047077, "macro/f1": 0.6038933000880791, "macro/f1_ci": {"90": [0.594478037324853, 0.613471230347705], "95": [0.5921218092360883, 0.615869972921379]}, "macro/recall": 0.6275044421067731, "macro/precision": 0.5872465756589016, "per_entity_metric": {"corporation": {"f1": 0.5255936675461742, "f1_ci": {"90": [0.5012547500758194, 0.551666427374269], "95": [0.4980840722696223, 0.5558710225982962]}, "precision": 0.5005025125628141, "recall": 0.5533333333333333}, "creative_work": {"f1": 0.4611679711017459, "f1_ci": {"90": [0.4312413847117795, 0.49084488005046123], "95": [0.4248320534266514, 0.4955962167603389]}, "precision": 0.4118279569892473, "recall": 0.5239398084815321}, "event": {"f1": 0.4583333333333333, "f1_ci": {"90": [0.4341053653001964, 0.48240018380694033], "95": [0.43086896520427154, 0.48663339577486764]}, "precision": 0.5189873417721519, "recall": 0.4103730664240218}, "group": {"f1": 0.6170427753452341, "f1_ci": {"90": [0.5971090680081758, 0.6395583815950936], "95": [0.5932359545008398, 0.6434986114965837]}, "precision": 0.631288766368022, "recall": 0.6034255599472991}, "location": {"f1": 0.6717267552182163, "f1_ci": {"90": [0.6452836612195251, 0.6974786576390933], "95": [0.6398442569759896, 0.7024897121350432]}, "precision": 0.6138728323699422, "recall": 0.7416201117318436}, "person": {"f1": 0.8439139084825467, "f1_ci": {"90": [0.833604517822505, 0.8540160799803808], "95": [0.8317777251893184, 0.8557604409045942]}, "precision": 0.8281860134895279, "recall": 0.8602507374631269}, "product": {"f1": 0.6494746895893028, "f1_ci": {"90": [0.6273696986005467, 0.6714700871761391], "95": [0.6241423028879639, 0.6750002697453603]}, "precision": 0.6060606060606061, "recall": 0.6995884773662552}}}, "2020.test": {"micro/f1": 0.6623235613463626, "micro/f1_ci": {"90": [0.6422474302424246, 0.6811324728997347], "95": [0.6389031826951058, 0.6850053691756447]}, "micro/recall": 0.6331084587441619, "micro/precision": 0.6943653955606147, "macro/f1": 0.6225690518125756, "macro/f1_ci": {"90": [0.6008525675448059, 0.6437197639424065], "95": [0.5962025204396408, 0.6474952948771053]}, "macro/recall": 0.6036807965123165, "macro/precision": 0.6499146769265831, "per_entity_metric": {"corporation": {"f1": 0.5750000000000001, "f1_ci": {"90": [0.5147291967415961, 0.6288765247654537], "95": [0.504878881260727, 0.6379375324900364]}, "precision": 0.5502392344497608, "recall": 0.6020942408376964}, "creative_work": {"f1": 0.5420054200542006, "f1_ci": {"90": [0.48349650349650347, 0.5967239555790587], "95": [0.4748140470674308, 0.6045342912754751]}, "precision": 0.5263157894736842, "recall": 0.5586592178770949}, "event": {"f1": 0.4675324675324675, "f1_ci": {"90": [0.41508036338225013, 0.5208786231884058], "95": [0.4035317955783264, 0.5322652223706814]}, "precision": 0.5482233502538071, "recall": 0.4075471698113208}, "group": {"f1": 0.5831775700934579, "f1_ci": {"90": [0.5351875463306153, 0.6303075786002615], "95": [0.526305305095408, 0.6420861887190302]}, "precision": 0.6964285714285714, "recall": 0.5016077170418006}, "location": {"f1": 0.6785714285714286, "f1_ci": {"90": [0.6144280090659667, 0.7387809684684685], "95": [0.6031520562770563, 0.7486189183112234]}, "precision": 0.6666666666666666, "recall": 0.6909090909090909}, "person": {"f1": 0.8356401384083046, "f1_ci": {"90": [0.8096005342802151, 0.8583135368553761], "95": [0.804469034180745, 0.8641178093692118]}, "precision": 0.8625, "recall": 0.8104026845637584}, "product": {"f1": 0.6760563380281691, "f1_ci": {"90": [0.6243472081218274, 0.7256235827664399], "95": [0.6153753026634383, 0.7338220562790275]}, "precision": 0.6990291262135923, "recall": 0.6545454545454545}}}, "2021.test (span detection)": {"micro/f1": 0.7917043399638336, "micro/f1_ci": {}, "micro/recall": 0.8101075517520527, "micro/precision": 0.7741186871477511, "macro/f1": 0.7917043399638336, "macro/f1_ci": {}, "macro/recall": 0.8101075517520527, "macro/precision": 0.7741186871477511}, "2020.test (span detection)": {"micro/f1": 0.7716535433070866, "micro/f1_ci": {}, "micro/recall": 0.7374156720290607, "micro/precision": 0.8092255125284739, "macro/f1": 0.7716535433070866, "macro/f1_ci": {}, "macro/recall": 0.7374156720290607, "macro/precision": 0.8092255125284739}}
eval/prediction.2020.dev.json CHANGED
The diff for this file is too large to render. See raw diff
eval/prediction.2020.test.json CHANGED
The diff for this file is too large to render. See raw diff
eval/prediction.2021.test.json CHANGED
The diff for this file is too large to render. See raw diff
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca4d699c094e8f6eb8671abed7d79a1ad84b0a55c35777f36d680a657df2bb9d
3
- size 1417433137
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c4b922ce1aa68fc817abaa4bc47d90cb6aa5534e1fc879a285e4008033c286
3
+ size 1417438577
tokenizer.json CHANGED
@@ -53,8 +53,7 @@
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
- "trim_offsets": true,
57
- "use_regex": true
58
  },
59
  "post_processor": {
60
  "type": "RobertaProcessing",
@@ -72,8 +71,7 @@
72
  "decoder": {
73
  "type": "ByteLevel",
74
  "add_prefix_space": true,
75
- "trim_offsets": true,
76
- "use_regex": true
77
  },
78
  "model": {
79
  "type": "BPE",
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
+ "trim_offsets": true
 
57
  },
58
  "post_processor": {
59
  "type": "RobertaProcessing",
71
  "decoder": {
72
  "type": "ByteLevel",
73
  "add_prefix_space": true,
74
+ "trim_offsets": true
 
75
  },
76
  "model": {
77
  "type": "BPE",
tokenizer_config.json CHANGED
@@ -6,10 +6,9 @@
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
  "model_max_length": 512,
9
- "name_or_path": "cner_output/model/self_training_2020/roberta_large_concat/best_model",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
12
- "special_tokens_map_file": "cner_output/model/self_training_2020/roberta_large_concat/best_model/special_tokens_map.json",
13
  "tokenizer_class": "RobertaTokenizer",
14
  "trim_offsets": true,
15
  "unk_token": "<unk>"
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
  "model_max_length": 512,
9
+ "name_or_path": "roberta-large",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
 
12
  "tokenizer_class": "RobertaTokenizer",
13
  "trim_offsets": true,
14
  "unk_token": "<unk>"
trainer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"data_split": "2020_2020.extra.roberta-large-2020", "model": "roberta-large", "crf": true, "max_length": 128, "epoch": 30, "batch_size": 32, "lr": 1e-05, "random_seed": 0, "gradient_accumulation_steps": 1, "weight_decay": 1e-07, "lr_warmup_step_ratio": 0.15, "max_grad_norm": 1}