asahi417 commited on
Commit
1879dc7
1 Parent(s): f4f039d

model update

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "cner_output/model/self_training_2021/roberta_large_concat/best_model",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "roberta-large",
3
  "architectures": [
4
  "RobertaForTokenClassification"
5
  ],
eval/metric.json CHANGED
@@ -1 +1 @@
1
- {"2020.dev": {"micro/f1": 0.6465191581219644, "micro/f1_ci": {"90": [0.6241835985561727, 0.6666757394302305], "95": [0.6194310343569784, 0.6704140430128958]}, "micro/recall": 0.625914315569488, "micro/precision": 0.6685267857142857, "macro/f1": 0.5907051354319692, "macro/f1_ci": {"90": [0.5674383743361626, 0.6112390385523173], "95": [0.5623141361222982, 0.6154035630159613]}, "macro/recall": 0.5790623427980605, "macro/precision": 0.6071458813975339, "per_entity_metric": {"corporation": {"f1": 0.4603580562659847, "f1_ci": {"90": [0.4009290953545232, 0.5159784417848934], "95": [0.3901007326007326, 0.5251514849883665]}, "precision": 0.4787234042553192, "recall": 0.4433497536945813}, "creative_work": {"f1": 0.5352798053527982, "f1_ci": {"90": [0.47683943945349055, 0.587317951235477], "95": [0.461119335471667, 0.5953614893360645]}, "precision": 0.541871921182266, "recall": 0.5288461538461539}, "event": {"f1": 0.3937360178970917, "f1_ci": {"90": [0.33707137136894216, 0.4480407643034555], "95": [0.3272708681911136, 0.460473678395846]}, "precision": 0.4607329842931937, "recall": 0.34375}, "group": {"f1": 0.5700934579439253, "f1_ci": {"90": [0.5119275461380725, 0.6200885560244932], "95": [0.5010342518596153, 0.6320186853520188]}, "precision": 0.6069651741293532, "recall": 0.5374449339207048}, "location": {"f1": 0.6598984771573605, "f1_ci": {"90": [0.6041470125786164, 0.7127670725723716], "95": [0.5940954506802723, 0.721102461603198]}, "precision": 0.6103286384976526, "recall": 0.7182320441988951}, "person": {"f1": 0.8712186689714779, "f1_ci": {"90": [0.8484734241567417, 0.890061406327069], "95": [0.8438054323492316, 0.8956910019429711]}, "precision": 0.9016100178890877, "recall": 0.842809364548495}, "product": {"f1": 0.6443514644351463, "f1_ci": {"90": [0.5992625468672358, 0.6863570057581574], "95": [0.5902266609812649, 0.6929224839542116]}, "precision": 0.6497890295358649, "recall": 0.6390041493775933}}}, "2021.test": {"micro/f1": 0.6432728900828296, "micro/f1_ci": {"90": [0.6346371239242744, 0.6527426061422125], "95": [0.6325785632921685, 0.654153674891706]}, "micro/recall": 0.6645467160037003, "micro/precision": 0.6233188720173536, "macro/f1": 0.5930573851639078, "macro/f1_ci": {"90": [0.5833111601787837, 0.6025124237732068], "95": [0.5808985762518359, 0.6047379720235664]}, "macro/recall": 0.6198835635729767, "macro/precision": 0.5724669964024179, "per_entity_metric": {"corporation": {"f1": 0.4987251402345742, "f1_ci": {"90": [0.4759021109395986, 0.5235213443902974], "95": [0.46998433956223123, 0.5286542480820317]}, "precision": 0.46088595664467485, "recall": 0.5433333333333333}, "creative_work": {"f1": 0.44747774480712166, "f1_ci": {"90": [0.41874086872898, 0.4765478768869068], "95": [0.413152311737946, 0.48175901413334915]}, "precision": 0.3951781970649895, "recall": 0.5157318741450069}, "event": {"f1": 0.45825049701789267, "f1_ci": {"90": [0.4346460786587436, 0.4808081736424989], "95": [0.4301067184089255, 0.48542853486853593]}, "precision": 0.5049288061336255, "recall": 0.4194722474977252}, "group": {"f1": 0.6011865524060646, "f1_ci": {"90": [0.5806004721301254, 0.6234138448986499], "95": [0.5773837112843277, 0.6271576187977291]}, "precision": 0.6015831134564644, "recall": 0.6007905138339921}, "location": {"f1": 0.6598334401024983, "f1_ci": {"90": [0.6294532874260654, 0.6873915404087002], "95": [0.6241111604910838, 0.6912957103397097]}, "precision": 0.6094674556213018, "recall": 0.7192737430167597}, "person": {"f1": 0.8356336260978671, "f1_ci": {"90": [0.8256715208072907, 0.8457981419617692], "95": [0.8239401591481131, 0.8480945851860394]}, "precision": 0.8130449947680503, "recall": 0.8595132743362832}, "product": {"f1": 0.650294695481336, "f1_ci": {"90": [0.6282566099248063, 0.6712689611864419], "95": [0.624556559287539, 0.6754463883482095]}, "precision": 0.6221804511278195, "recall": 0.6810699588477366}}}, "2020.test": {"micro/f1": 0.6561243634414365, "micro/f1_ci": {"90": [0.6365860685687125, 0.6744129002816157], "95": [0.6333481261188718, 0.6773850218862604]}, "micro/recall": 0.6351842241826674, "micro/precision": 0.6784922394678492, "macro/f1": 0.6173467345908658, "macro/f1_ci": {"90": [0.5952656950320451, 0.6366890460971872], "95": [0.5922888797012332, 0.6403723361198639]}, "macro/recall": 0.6041080498223751, "macro/precision": 0.6360219884536791, "per_entity_metric": {"corporation": {"f1": 0.5658536585365853, "f1_ci": {"90": [0.5125526508949241, 0.6157760814249363], "95": [0.5027554329966137, 0.6263709692698268]}, "precision": 0.5296803652968036, "recall": 0.6073298429319371}, "creative_work": {"f1": 0.5420054200542006, "f1_ci": {"90": [0.4860285187672348, 0.5947376093294461], "95": [0.4764635854341736, 0.6060645313235987]}, "precision": 0.5263157894736842, "recall": 0.5586592178770949}, "event": {"f1": 0.4917355371900826, "f1_ci": {"90": [0.4376105935166595, 0.5475549542404379], "95": [0.42447425883933815, 0.5573156168530822]}, "precision": 0.54337899543379, "recall": 0.4490566037735849}, "group": {"f1": 0.5709090909090908, "f1_ci": {"90": [0.5263063063063063, 0.6166781270464964], "95": [0.5152530244686334, 0.627799430639498]}, "precision": 0.6569037656903766, "recall": 0.5048231511254019}, "location": {"f1": 0.6646525679758307, "f1_ci": {"90": [0.5992742848265606, 0.7279340004553058], "95": [0.5844119363782285, 0.7375091019417477]}, "precision": 0.6626506024096386, "recall": 0.6666666666666666}, "person": {"f1": 0.8306104901117799, "f1_ci": {"90": [0.8044736411197559, 0.8549033122756916], "95": [0.7984257451859667, 0.8582263492659025]}, "precision": 0.8518518518518519, "recall": 0.8104026845637584}, "product": {"f1": 0.6556603773584906, "f1_ci": {"90": [0.6049658273381295, 0.7033524218563588], "95": [0.5955476190476191, 0.7139669826740765]}, "precision": 0.6813725490196079, "recall": 0.6318181818181818}}}, "2021.test (span detection)": {"micro/f1": 0.7835791598900791, "micro/f1_ci": {}, "micro/recall": 0.8079102578929108, "micro/precision": 0.760670731707317, "macro/f1": 0.7835791598900791, "macro/f1_ci": {}, "macro/recall": 0.8079102578929108, "macro/precision": 0.760670731707317}, "2020.test (span detection)": {"micro/f1": 0.7686105885514646, "micro/f1_ci": {}, "micro/recall": 0.742086144265698, "micro/precision": 0.7971014492753623, "macro/f1": 0.7686105885514646, "macro/f1_ci": {}, "macro/recall": 0.742086144265698, "macro/precision": 0.7971014492753623}, "2020.dev (span detection)": {"micro/f1": 0.7682333873581848, "micro/f1_ci": {}, "micro/recall": 0.7429467084639498, "micro/precision": 0.7953020134228188, "macro/f1": 0.7682333873581848, "macro/f1_ci": {}, "macro/recall": 0.7429467084639498, "macro/precision": 0.7953020134228188}}
 
1
+ {"2020.dev": {"micro/f1": 0.6461038961038961, "micro/f1_ci": {}, "micro/recall": 0.6238244514106583, "micro/precision": 0.67003367003367, "macro/f1": 0.587322338466695, "macro/f1_ci": {}, "macro/recall": 0.5756381956969621, "macro/precision": 0.6090469200966296, "per_entity_metric": {"corporation": {"f1": 0.484375, "f1_ci": {}, "precision": 0.5138121546961326, "recall": 0.458128078817734}, "creative_work": {"f1": 0.511520737327189, "f1_ci": {}, "precision": 0.4911504424778761, "recall": 0.5336538461538461}, "event": {"f1": 0.38613861386138615, "f1_ci": {}, "precision": 0.527027027027027, "recall": 0.3046875}, "group": {"f1": 0.5733333333333334, "f1_ci": {}, "precision": 0.57847533632287, "recall": 0.5682819383259912}, "location": {"f1": 0.640625, "f1_ci": {}, "precision": 0.6059113300492611, "recall": 0.6795580110497238}, "person": {"f1": 0.8709122203098106, "f1_ci": {}, "precision": 0.8971631205673759, "recall": 0.8461538461538461}, "product": {"f1": 0.6443514644351463, "f1_ci": {}, "precision": 0.6497890295358649, "recall": 0.6390041493775933}}}, "2021.test": {"micro/f1": 0.6451758087201125, "micro/f1_ci": {"90": [0.6360452531843157, 0.6546242674951402], "95": [0.6344128889037165, 0.6562435584441533]}, "micro/recall": 0.6630434782608695, "micro/precision": 0.6282458639202366, "macro/f1": 0.5945137835095485, "macro/f1_ci": {"90": [0.5849625646474219, 0.6048099476423717], "95": [0.5832263131312113, 0.6064180014512437]}, "macro/recall": 0.6195808065595296, "macro/precision": 0.5791991181065553, "per_entity_metric": {"corporation": {"f1": 0.5067218200620476, "f1_ci": {"90": [0.482720115378613, 0.5318353967881462], "95": [0.4796950147983722, 0.5369819879159188]}, "precision": 0.4738878143133462, "recall": 0.5444444444444444}, "creative_work": {"f1": 0.45376220562894887, "f1_ci": {"90": [0.42664800694587557, 0.48082929944314123], "95": [0.42158048043728424, 0.4846494398622172]}, "precision": 0.3910891089108911, "recall": 0.5403556771545828}, "event": {"f1": 0.4452749599572877, "f1_ci": {"90": [0.41903187721369545, 0.4705294264916067], "95": [0.41504947523868935, 0.4746242629321899]}, "precision": 0.5387596899224806, "recall": 0.3794358507734304}, "group": {"f1": 0.6063348416289593, "f1_ci": {"90": [0.5861107400130976, 0.6279132370043825], "95": [0.5830224656689539, 0.6322202313625416]}, "precision": 0.5951776649746193, "recall": 0.6179183135704874}, "location": {"f1": 0.6619263089851325, "f1_ci": {"90": [0.6324935407210904, 0.6882328863476124], "95": [0.6253949479866915, 0.6927306998069366]}, "precision": 0.6161251504211793, "recall": 0.7150837988826816}, "person": {"f1": 0.835890955046037, "f1_ci": {"90": [0.8259559078534311, 0.8463653140397381], "95": [0.8236693872632049, 0.848508779376511]}, "precision": 0.8188892819243013, "recall": 0.8536135693215339}, "product": {"f1": 0.651685393258427, "f1_ci": {"90": [0.6290720140515222, 0.6729078211863003], "95": [0.6251710926532523, 0.6767235821801794]}, "precision": 0.6204651162790698, "recall": 0.6862139917695473}}}, "2020.test": {"micro/f1": 0.6605206073752712, "micro/f1_ci": {"90": [0.6408255159753993, 0.6793274637933173], "95": [0.6373893291997992, 0.6825239933971441]}, "micro/recall": 0.6320705760249092, "micro/precision": 0.6916524701873935, "macro/f1": 0.6182768841282975, "macro/f1_ci": {"90": [0.5961031178968297, 0.6385985065596177], "95": [0.5914673366772406, 0.6421666965178469]}, "macro/recall": 0.600022393469146, "macro/precision": 0.646958757311601, "per_entity_metric": {"corporation": {"f1": 0.5822784810126582, "f1_ci": {"90": [0.5256752905886762, 0.6318421628123121], "95": [0.5128, 0.6414092076062835]}, "precision": 0.5637254901960784, "recall": 0.6020942408376964}, "creative_work": {"f1": 0.5343915343915344, "f1_ci": {"90": [0.476412981770012, 0.588596538853071], "95": [0.4636852685629959, 0.5979764281221802]}, "precision": 0.507537688442211, "recall": 0.5642458100558659}, "event": {"f1": 0.4439252336448598, "f1_ci": {"90": [0.38862436460968, 0.497920986683701], "95": [0.3779821592181615, 0.5074727486164683]}, "precision": 0.5828220858895705, "recall": 0.3584905660377358}, "group": {"f1": 0.597864768683274, "f1_ci": {"90": [0.5551718002208861, 0.6446087202184763], "95": [0.548585759887216, 0.6524932368209403]}, "precision": 0.6693227091633466, "recall": 0.5401929260450161}, "location": {"f1": 0.6808510638297872, "f1_ci": {"90": [0.6142731829573934, 0.743047103899031], "95": [0.6024013007045483, 0.7507189002768739]}, "precision": 0.6829268292682927, "recall": 0.6787878787878788}, "person": {"f1": 0.8343347639484978, "f1_ci": {"90": [0.8076569065611846, 0.8573930598247411], "95": [0.8034319572294566, 0.8639877286061661]}, "precision": 0.8541300527240774, "recall": 0.8154362416107382}, "product": {"f1": 0.6542923433874709, "f1_ci": {"90": [0.6053549332303825, 0.7005813660209848], "95": [0.5979308671922375, 0.7089777612241414]}, "precision": 0.6682464454976303, "recall": 0.6409090909090909}}}, "2021.test (span detection)": {"micro/f1": 0.7849668054461573, "micro/f1_ci": {}, "micro/recall": 0.8067537874407309, "micro/precision": 0.7643256272597787, "macro/f1": 0.7849668054461573, "macro/f1_ci": {}, "macro/recall": 0.8067537874407309, "macro/precision": 0.7643256272597787}, "2020.test (span detection)": {"micro/f1": 0.769397721106891, "micro/f1_ci": {}, "micro/recall": 0.7358588479501816, "micro/precision": 0.8061398521887436, "macro/f1": 0.769397721106891, "macro/f1_ci": {}, "macro/recall": 0.7358588479501816, "macro/precision": 0.8061398521887436}}
eval/prediction.2020.dev.json CHANGED
The diff for this file is too large to render. See raw diff
 
eval/prediction.2020.test.json CHANGED
The diff for this file is too large to render. See raw diff
 
eval/prediction.2021.test.json CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06fab623a68f7667511884c9f9ae684a13716f9f5e16622e556051d7c98a657f
3
- size 1417433137
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c08fba6e00561b8cdcde41eb9a80fb82bc92b3f49fd455e5260f2402b0f19d
3
+ size 1417438577
tokenizer.json CHANGED
@@ -53,8 +53,7 @@
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
- "trim_offsets": true,
57
- "use_regex": true
58
  },
59
  "post_processor": {
60
  "type": "RobertaProcessing",
@@ -72,8 +71,7 @@
72
  "decoder": {
73
  "type": "ByteLevel",
74
  "add_prefix_space": true,
75
- "trim_offsets": true,
76
- "use_regex": true
77
  },
78
  "model": {
79
  "type": "BPE",
 
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
+ "trim_offsets": true
 
57
  },
58
  "post_processor": {
59
  "type": "RobertaProcessing",
 
71
  "decoder": {
72
  "type": "ByteLevel",
73
  "add_prefix_space": true,
74
+ "trim_offsets": true
 
75
  },
76
  "model": {
77
  "type": "BPE",
tokenizer_config.json CHANGED
@@ -6,10 +6,9 @@
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
  "model_max_length": 512,
9
- "name_or_path": "cner_output/model/self_training_2021/roberta_large_concat/best_model",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
12
- "special_tokens_map_file": "cner_output/model/self_training_2021/roberta_large_concat/best_model/special_tokens_map.json",
13
  "tokenizer_class": "RobertaTokenizer",
14
  "trim_offsets": true,
15
  "unk_token": "<unk>"
 
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
  "model_max_length": 512,
9
+ "name_or_path": "roberta-large",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
 
12
  "tokenizer_class": "RobertaTokenizer",
13
  "trim_offsets": true,
14
  "unk_token": "<unk>"
trainer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"data_split": "2020_2021.extra.roberta-large-2020", "model": "roberta-large", "crf": true, "max_length": 128, "epoch": 30, "batch_size": 32, "lr": 1e-05, "random_seed": 0, "gradient_accumulation_steps": 1, "weight_decay": 1e-07, "lr_warmup_step_ratio": 0.3, "max_grad_norm": 1}