Tim Miller commited on
Commit
dcd6a39
1 Parent(s): 1dbb41b

Updated for cnlp v0.6.0

Browse files
added_tokens.json CHANGED
@@ -1 +1,10 @@
1
- {"<cr>": 50271, "</e>": 50266, "</a2>": 50270, "</a1>": 50268, "<neg>": 50272, "<a1>": 50267, "<e>": 50265, "<a2>": 50269}
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</a1>": 28898,
3
+ "</a2>": 28900,
4
+ "</e>": 28896,
5
+ "<a1>": 28897,
6
+ "<a2>": 28899,
7
+ "<cr>": 28901,
8
+ "<e>": 28895,
9
+ "<neg>": 28902
10
+ }
config.json CHANGED
@@ -2,15 +2,16 @@
2
  "architectures": [
3
  "CnlpModelForClassification"
4
  ],
 
5
  "encoder_config": {
6
- "_name_or_path": "roberta-base",
7
  "add_cross_attention": false,
8
  "architectures": [
9
- "RobertaForMaskedLM"
10
  ],
11
  "attention_probs_dropout_prob": 0.1,
12
  "bad_words_ids": null,
13
- "bos_token_id": 0,
14
  "chunk_size_feed_forward": 0,
15
  "classifier_dropout": null,
16
  "cross_attention_hidden_size": null,
@@ -19,7 +20,8 @@
19
  "do_sample": false,
20
  "early_stopping": false,
21
  "encoder_no_repeat_ngram_size": 0,
22
- "eos_token_id": 2,
 
23
  "finetuning_task": null,
24
  "forced_bos_token_id": null,
25
  "forced_eos_token_id": null,
@@ -38,12 +40,12 @@
38
  "LABEL_0": 0,
39
  "LABEL_1": 1
40
  },
41
- "layer_norm_eps": 1e-05,
42
  "length_penalty": 1.0,
43
  "max_length": 20,
44
- "max_position_embeddings": 514,
45
  "min_length": 0,
46
- "model_type": "roberta",
47
  "no_repeat_ngram_size": 0,
48
  "num_attention_heads": 12,
49
  "num_beam_groups": 1,
@@ -53,7 +55,7 @@
53
  "output_attentions": false,
54
  "output_hidden_states": false,
55
  "output_scores": false,
56
- "pad_token_id": 1,
57
  "position_embedding_type": "absolute",
58
  "prefix": null,
59
  "problem_type": null,
@@ -65,6 +67,7 @@
65
  "sep_token_id": null,
66
  "task_specific_params": null,
67
  "temperature": 1.0,
 
68
  "tie_encoder_decoder": false,
69
  "tie_word_embeddings": true,
70
  "tokenizer_class": null,
@@ -72,35 +75,46 @@
72
  "top_p": 1.0,
73
  "torch_dtype": null,
74
  "torchscript": false,
75
- "transformers_version": "4.15.0",
76
- "type_vocab_size": 1,
 
77
  "use_bfloat16": false,
78
  "use_cache": true,
79
- "vocab_size": 50273
80
  },
81
- "encoder_name": "roberta-base",
82
  "finetuning_task": [
83
  "event"
84
  ],
85
  "hidden_dropout_prob": 0.1,
86
  "hidden_size": 768,
87
- "layer": 11,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  "model_type": "cnlpt",
89
- "num_labels_list": [
90
- 9
91
- ],
92
  "num_rel_attention_heads": 12,
93
- "num_tokens": -1,
94
  "rel_attention_head_dims": 64,
95
- "relations": [
96
- false
97
- ],
98
- "tagger": [
99
- true
100
- ],
101
  "tokens": false,
102
  "torch_dtype": "float32",
103
- "transformers_version": "4.15.0",
104
  "use_prior_tasks": false,
105
- "vocab_size": 50273
106
  }
 
2
  "architectures": [
3
  "CnlpModelForClassification"
4
  ],
5
+ "cnlpt_version": "0.6.0",
6
  "encoder_config": {
7
+ "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
8
  "add_cross_attention": false,
9
  "architectures": [
10
+ "BertForMaskedLM"
11
  ],
12
  "attention_probs_dropout_prob": 0.1,
13
  "bad_words_ids": null,
14
+ "bos_token_id": null,
15
  "chunk_size_feed_forward": 0,
16
  "classifier_dropout": null,
17
  "cross_attention_hidden_size": null,
 
20
  "do_sample": false,
21
  "early_stopping": false,
22
  "encoder_no_repeat_ngram_size": 0,
23
+ "eos_token_id": null,
24
+ "exponential_decay_length_penalty": null,
25
  "finetuning_task": null,
26
  "forced_bos_token_id": null,
27
  "forced_eos_token_id": null,
 
40
  "LABEL_0": 0,
41
  "LABEL_1": 1
42
  },
43
+ "layer_norm_eps": 1e-12,
44
  "length_penalty": 1.0,
45
  "max_length": 20,
46
+ "max_position_embeddings": 512,
47
  "min_length": 0,
48
+ "model_type": "bert",
49
  "no_repeat_ngram_size": 0,
50
  "num_attention_heads": 12,
51
  "num_beam_groups": 1,
 
55
  "output_attentions": false,
56
  "output_hidden_states": false,
57
  "output_scores": false,
58
+ "pad_token_id": 0,
59
  "position_embedding_type": "absolute",
60
  "prefix": null,
61
  "problem_type": null,
 
67
  "sep_token_id": null,
68
  "task_specific_params": null,
69
  "temperature": 1.0,
70
+ "tf_legacy_loss": false,
71
  "tie_encoder_decoder": false,
72
  "tie_word_embeddings": true,
73
  "tokenizer_class": null,
 
75
  "top_p": 1.0,
76
  "torch_dtype": null,
77
  "torchscript": false,
78
+ "transformers_version": "4.22.2",
79
+ "type_vocab_size": 2,
80
+ "typical_p": 1.0,
81
  "use_bfloat16": false,
82
  "use_cache": true,
83
+ "vocab_size": 28903
84
  },
85
+ "encoder_name": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
86
  "finetuning_task": [
87
  "event"
88
  ],
89
  "hidden_dropout_prob": 0.1,
90
  "hidden_size": 768,
91
+ "hier_head_config": null,
92
+ "label_dictionary": {
93
+ "event": [
94
+ "B-AFTER",
95
+ "B-BEFORE",
96
+ "B-BEFORE/OVERLAP",
97
+ "B-OVERLAP",
98
+ "I-AFTER",
99
+ "I-BEFORE",
100
+ "I-BEFORE/OVERLAP",
101
+ "I-OVERLAP",
102
+ "O"
103
+ ]
104
+ },
105
+ "layer": 12,
106
  "model_type": "cnlpt",
 
 
 
107
  "num_rel_attention_heads": 12,
 
108
  "rel_attention_head_dims": 64,
109
+ "relations": {
110
+ "event": false
111
+ },
112
+ "tagger": {
113
+ "event": true
114
+ },
115
  "tokens": false,
116
  "torch_dtype": "float32",
117
+ "transformers_version": "4.22.2",
118
  "use_prior_tasks": false,
119
+ "vocab_size": 28903
120
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:623232360d08a64a4445fb2db5869c2042e5f90ba7496a6bde90e93676d89361
3
- size 501053685
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea51bdfa098b57514ebf75d3e7245564a14f8377c5c7e8ea35521035c92172a
3
+ size 435419517
special_tokens_map.json CHANGED
@@ -1 +1,17 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<e>",
4
+ "</e>",
5
+ "<a1>",
6
+ "</a1>",
7
+ "<a2>",
8
+ "</a2>",
9
+ "<cr>",
10
+ "<neg>"
11
+ ],
12
+ "cls_token": "[CLS]",
13
+ "mask_token": "[MASK]",
14
+ "pad_token": "[PAD]",
15
+ "sep_token": "[SEP]",
16
+ "unk_token": "[UNK]"
17
+ }
tokenizer_config.json CHANGED
@@ -1 +1,26 @@
1
- {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": true, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "trim_offsets": true, "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-base", "tokenizer_class": "RobertaTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "additional_special_tokens": [
4
+ "<e>",
5
+ "</e>",
6
+ "<a1>",
7
+ "</a1>",
8
+ "<a2>",
9
+ "</a2>",
10
+ "<cr>",
11
+ "<neg>"
12
+ ],
13
+ "cls_token": "[CLS]",
14
+ "do_basic_tokenize": true,
15
+ "do_lower_case": true,
16
+ "mask_token": "[MASK]",
17
+ "name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
18
+ "never_split": null,
19
+ "pad_token": "[PAD]",
20
+ "sep_token": "[SEP]",
21
+ "special_tokens_map_file": null,
22
+ "strip_accents": null,
23
+ "tokenize_chinese_chars": true,
24
+ "tokenizer_class": "BertTokenizer",
25
+ "unk_token": "[UNK]"
26
+ }