tmills commited on
Commit
dc8c87b
1 Parent(s): 02cebea

Update for cnlpt v0.6.1. lr=1e-5, layer=12, bs=8

Browse files
added_tokens.json CHANGED
@@ -1 +1,10 @@
1
- {"</a2>": 28900, "</e>": 28896, "<a2>": 28899, "<cr>": 28901, "<a1>": 28897, "</a1>": 28898, "<e>": 28895, "<neg>": 28902}
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</a1>": 28898,
3
+ "</a2>": 28900,
4
+ "</e>": 28896,
5
+ "<a1>": 28897,
6
+ "<a2>": 28899,
7
+ "<cr>": 28901,
8
+ "<e>": 28895,
9
+ "<neg>": 28902
10
+ }
config.json CHANGED
@@ -2,6 +2,7 @@
2
  "architectures": [
3
  "CnlpModelForClassification"
4
  ],
 
5
  "encoder_config": {
6
  "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
7
  "add_cross_attention": false,
@@ -66,6 +67,7 @@
66
  "sep_token_id": null,
67
  "task_specific_params": null,
68
  "temperature": 1.0,
 
69
  "tie_encoder_decoder": false,
70
  "tie_word_embeddings": true,
71
  "tokenizer_class": null,
@@ -73,7 +75,7 @@
73
  "top_p": 1.0,
74
  "torch_dtype": null,
75
  "torchscript": false,
76
- "transformers_version": "4.18.0",
77
  "type_vocab_size": 2,
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
@@ -81,42 +83,73 @@
81
  "vocab_size": 28903
82
  },
83
  "encoder_name": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
 
 
 
 
 
 
 
 
 
84
  "hidden_dropout_prob": 0.1,
85
  "hidden_size": 768,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  "layer": 12,
87
  "model_type": "cnlpt",
88
- "num_labels_list": [
89
- 2,
90
- 2,
91
- 2,
92
- 2,
93
- 2,
94
- 2,
95
- 2
96
- ],
97
  "num_rel_attention_heads": 12,
98
  "rel_attention_head_dims": 64,
99
- "relations": [
100
- false,
101
- false,
102
- false,
103
- false,
104
- false,
105
- false,
106
- false
107
- ],
108
- "tagger": [
109
- false,
110
- false,
111
- false,
112
- false,
113
- false,
114
- false,
115
- false
116
- ],
117
  "tokens": false,
118
  "torch_dtype": "float32",
119
- "transformers_version": "4.18.0",
120
  "use_prior_tasks": false,
121
  "vocab_size": 28903
122
  }
 
2
  "architectures": [
3
  "CnlpModelForClassification"
4
  ],
5
+ "cnlpt_version": "0.7.0",
6
  "encoder_config": {
7
  "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
8
  "add_cross_attention": false,
 
67
  "sep_token_id": null,
68
  "task_specific_params": null,
69
  "temperature": 1.0,
70
+ "tf_legacy_loss": false,
71
  "tie_encoder_decoder": false,
72
  "tie_word_embeddings": true,
73
  "tokenizer_class": null,
 
75
  "top_p": 1.0,
76
  "torch_dtype": null,
77
  "torchscript": false,
78
+ "transformers_version": "4.22.2",
79
  "type_vocab_size": 2,
80
  "typical_p": 1.0,
81
  "use_bfloat16": false,
 
83
  "vocab_size": 28903
84
  },
85
  "encoder_name": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
86
+ "finetuning_task": [
87
+ "Process (Thought Process)",
88
+ "Content (Thought Content)",
89
+ "Mood",
90
+ "Substance",
91
+ "Interpersonal",
92
+ "Occupation",
93
+ "Appearance"
94
+ ],
95
  "hidden_dropout_prob": 0.1,
96
  "hidden_size": 768,
97
+ "hier_head_config": null,
98
+ "label_dictionary": {
99
+ "Appearance": [
100
+ "No",
101
+ "Yes"
102
+ ],
103
+ "Content (Thought Content)": [
104
+ "No",
105
+ "Yes"
106
+ ],
107
+ "Interpersonal": [
108
+ "No",
109
+ "Yes"
110
+ ],
111
+ "Mood": [
112
+ "No",
113
+ "Yes"
114
+ ],
115
+ "Occupation": [
116
+ "No",
117
+ "Yes"
118
+ ],
119
+ "Process (Thought Process)": [
120
+ "No",
121
+ "Yes"
122
+ ],
123
+ "Substance": [
124
+ "No",
125
+ "Yes"
126
+ ]
127
+ },
128
  "layer": 12,
129
  "model_type": "cnlpt",
 
 
 
 
 
 
 
 
 
130
  "num_rel_attention_heads": 12,
131
  "rel_attention_head_dims": 64,
132
+ "relations": {
133
+ "Appearance": false,
134
+ "Content (Thought Content)": false,
135
+ "Interpersonal": false,
136
+ "Mood": false,
137
+ "Occupation": false,
138
+ "Process (Thought Process)": false,
139
+ "Substance": false
140
+ },
141
+ "tagger": {
142
+ "Appearance": false,
143
+ "Content (Thought Content)": false,
144
+ "Interpersonal": false,
145
+ "Mood": false,
146
+ "Occupation": false,
147
+ "Process (Thought Process)": false,
148
+ "Substance": false
149
+ },
150
  "tokens": false,
151
  "torch_dtype": "float32",
152
+ "transformers_version": "4.22.2",
153
  "use_prior_tasks": false,
154
  "vocab_size": 28903
155
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69e4518295ba2aac6053ba04d7d24304903779bb4e701da8f81f3bd462cf4821
3
- size 449614969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb229677a7f2a2f96e19876627157e6eddef8d833efa8e514efe9eaae05079a
3
+ size 449617245
special_tokens_map.json CHANGED
@@ -1 +1,17 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<e>",
4
+ "</e>",
5
+ "<a1>",
6
+ "</a1>",
7
+ "<a2>",
8
+ "</a2>",
9
+ "<cr>",
10
+ "<neg>"
11
+ ],
12
+ "cls_token": "[CLS]",
13
+ "mask_token": "[MASK]",
14
+ "pad_token": "[PAD]",
15
+ "sep_token": "[SEP]",
16
+ "unk_token": "[UNK]"
17
+ }
tokenizer.json CHANGED
@@ -1,124 +1,138 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "special": true,
9
  "content": "[PAD]",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
- "normalized": false
 
14
  },
15
  {
16
  "id": 1,
17
- "special": true,
18
  "content": "[UNK]",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
- "normalized": false
 
23
  },
24
  {
25
  "id": 2,
26
- "special": true,
27
  "content": "[CLS]",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
- "normalized": false
 
32
  },
33
  {
34
  "id": 3,
35
- "special": true,
36
  "content": "[SEP]",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
- "normalized": false
 
41
  },
42
  {
43
  "id": 4,
44
- "special": true,
45
  "content": "[MASK]",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
- "normalized": false
 
50
  },
51
  {
52
  "id": 28895,
53
- "special": true,
54
  "content": "<e>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
- "normalized": false
 
59
  },
60
  {
61
  "id": 28896,
62
- "special": true,
63
  "content": "</e>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
- "normalized": false
 
68
  },
69
  {
70
  "id": 28897,
71
- "special": true,
72
  "content": "<a1>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
- "normalized": false
 
77
  },
78
  {
79
  "id": 28898,
80
- "special": true,
81
  "content": "</a1>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
- "normalized": false
 
86
  },
87
  {
88
  "id": 28899,
89
- "special": true,
90
  "content": "<a2>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
- "normalized": false
 
95
  },
96
  {
97
  "id": 28900,
98
- "special": true,
99
  "content": "</a2>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
- "normalized": false
 
104
  },
105
  {
106
  "id": 28901,
107
- "special": true,
108
  "content": "<cr>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
- "normalized": false
 
113
  },
114
  {
115
  "id": 28902,
116
- "special": true,
117
  "content": "<neg>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
- "normalized": false
 
122
  }
123
  ],
124
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 128
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
22
  "content": "[PAD]",
23
  "single_word": false,
24
  "lstrip": false,
25
  "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
  },
29
  {
30
  "id": 1,
 
31
  "content": "[UNK]",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
  },
38
  {
39
  "id": 2,
 
40
  "content": "[CLS]",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
  },
47
  {
48
  "id": 3,
 
49
  "content": "[SEP]",
50
  "single_word": false,
51
  "lstrip": false,
52
  "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
  },
56
  {
57
  "id": 4,
 
58
  "content": "[MASK]",
59
  "single_word": false,
60
  "lstrip": false,
61
  "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
  },
65
  {
66
  "id": 28895,
 
67
  "content": "<e>",
68
  "single_word": false,
69
  "lstrip": false,
70
  "rstrip": false,
71
+ "normalized": false,
72
+ "special": true
73
  },
74
  {
75
  "id": 28896,
 
76
  "content": "</e>",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
  },
83
  {
84
  "id": 28897,
 
85
  "content": "<a1>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
89
+ "normalized": false,
90
+ "special": true
91
  },
92
  {
93
  "id": 28898,
 
94
  "content": "</a1>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
  },
101
  {
102
  "id": 28899,
 
103
  "content": "<a2>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
107
+ "normalized": false,
108
+ "special": true
109
  },
110
  {
111
  "id": 28900,
 
112
  "content": "</a2>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
116
+ "normalized": false,
117
+ "special": true
118
  },
119
  {
120
  "id": 28901,
 
121
  "content": "<cr>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
125
+ "normalized": false,
126
+ "special": true
127
  },
128
  {
129
  "id": 28902,
 
130
  "content": "<neg>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
134
+ "normalized": false,
135
+ "special": true
136
  }
137
  ],
138
  "normalizer": {
tokenizer_config.json CHANGED
@@ -1 +1,26 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "add_prefix_space": true, "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"], "special_tokens_map_file": null, "name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "additional_special_tokens": [
4
+ "<e>",
5
+ "</e>",
6
+ "<a1>",
7
+ "</a1>",
8
+ "<a2>",
9
+ "</a2>",
10
+ "<cr>",
11
+ "<neg>"
12
+ ],
13
+ "cls_token": "[CLS]",
14
+ "do_basic_tokenize": true,
15
+ "do_lower_case": true,
16
+ "mask_token": "[MASK]",
17
+ "name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
18
+ "never_split": null,
19
+ "pad_token": "[PAD]",
20
+ "sep_token": "[SEP]",
21
+ "special_tokens_map_file": null,
22
+ "strip_accents": null,
23
+ "tokenize_chinese_chars": true,
24
+ "tokenizer_class": "BertTokenizer",
25
+ "unk_token": "[UNK]"
26
+ }