mevol commited on
Commit
02f0be7
1 Parent(s): 78c054d

adding core files to run smaller bioformer8L model

Browse files
config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bioformers/bioformer-8L",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 512,
11
+ "id2label": {
12
+ "0": "O",
13
+ "1": "B-chemical",
14
+ "2": "B-species",
15
+ "3": "B-gene",
16
+ "4": "I-gene",
17
+ "5": "B-taxonomy_domain",
18
+ "6": "I-chemical",
19
+ "7": "B-protein_type",
20
+ "8": "B-experimental_method",
21
+ "9": "I-experimental_method",
22
+ "10": "I-protein_type",
23
+ "11": "I-species",
24
+ "12": "B-evidence",
25
+ "13": "I-evidence",
26
+ "14": "B-protein",
27
+ "15": "I-protein",
28
+ "16": "B-protein_state",
29
+ "17": "B-site",
30
+ "18": "I-site",
31
+ "19": "B-structure_element",
32
+ "20": "I-structure_element",
33
+ "21": "B-complex_assembly",
34
+ "22": "I-protein_state",
35
+ "23": "I-complex_assembly",
36
+ "24": "B-ptm",
37
+ "25": "B-mutant",
38
+ "26": "I-mutant",
39
+ "27": "B-oligomeric_state",
40
+ "28": "B-residue_name_number",
41
+ "29": "B-bond_interaction",
42
+ "30": "I-bond_interaction",
43
+ "31": "B-residue_range",
44
+ "32": "I-residue_range",
45
+ "33": "B-residue_name",
46
+ "34": "B-residue_number",
47
+ "35": "I-ptm",
48
+ "36": "I-taxonomy_domain",
49
+ "37": "I-oligomeric_state",
50
+ "38": "I-residue_name_number",
51
+ "39": "I-residue_name",
52
+ "40": "I-residue_number"
53
+ },
54
+ "initializer_range": 0.02,
55
+ "intermediate_size": 2048,
56
+ "label2id": {
57
+ "B-bond_interaction": 29,
58
+ "B-chemical": 1,
59
+ "B-complex_assembly": 21,
60
+ "B-evidence": 12,
61
+ "B-experimental_method": 8,
62
+ "B-gene": 3,
63
+ "B-mutant": 25,
64
+ "B-oligomeric_state": 27,
65
+ "B-protein": 14,
66
+ "B-protein_state": 16,
67
+ "B-protein_type": 7,
68
+ "B-ptm": 24,
69
+ "B-residue_name": 33,
70
+ "B-residue_name_number": 28,
71
+ "B-residue_number": 34,
72
+ "B-residue_range": 31,
73
+ "B-site": 17,
74
+ "B-species": 2,
75
+ "B-structure_element": 19,
76
+ "B-taxonomy_domain": 5,
77
+ "I-bond_interaction": 30,
78
+ "I-chemical": 6,
79
+ "I-complex_assembly": 23,
80
+ "I-evidence": 13,
81
+ "I-experimental_method": 9,
82
+ "I-gene": 4,
83
+ "I-mutant": 26,
84
+ "I-oligomeric_state": 37,
85
+ "I-protein": 15,
86
+ "I-protein_state": 22,
87
+ "I-protein_type": 10,
88
+ "I-ptm": 35,
89
+ "I-residue_name": 39,
90
+ "I-residue_name_number": 38,
91
+ "I-residue_number": 40,
92
+ "I-residue_range": 32,
93
+ "I-site": 18,
94
+ "I-species": 11,
95
+ "I-structure_element": 20,
96
+ "I-taxonomy_domain": 36,
97
+ "O": 0
98
+ },
99
+ "layer_norm_eps": 1e-12,
100
+ "max_position_embeddings": 512,
101
+ "model_type": "bert",
102
+ "num_attention_heads": 8,
103
+ "num_hidden_layers": 8,
104
+ "pad_token_id": 0,
105
+ "position_embedding_type": "absolute",
106
+ "torch_dtype": "float32",
107
+ "transformers_version": "4.28.0",
108
+ "type_vocab_size": 2,
109
+ "use_cache": true,
110
+ "vocab_size": 32768
111
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39ef12875e83f443c75ec35530cc20fe735318dc7da605d6e6d7e502e61b775
3
+ size 338331642
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0361d9b66b17de750435cff934d86df55f0ae401007e95b46cb7b866adf00f2e
3
+ size 169176682
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c2594a1e721a20cff9f81a9c6b35735e5ef856ae026e6f451a155090123133
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a2812a42769889644fb265b287a3cb6966f030d4b6988b60a19dba3930cea8
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.659090909090908,
5
+ "global_step": 8500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.57,
12
+ "learning_rate": 1.8863636363636366e-05,
13
+ "loss": 0.9603,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.917317101584343,
19
+ "eval_f1": 0.780907047467936,
20
+ "eval_loss": 0.3360126316547394,
21
+ "eval_precision": 0.7684100962789487,
22
+ "eval_recall": 0.7938172043010753,
23
+ "eval_runtime": 4.4585,
24
+ "eval_samples_per_second": 211.507,
25
+ "eval_steps_per_second": 42.391,
26
+ "step": 880
27
+ },
28
+ {
29
+ "epoch": 1.14,
30
+ "learning_rate": 1.772727272727273e-05,
31
+ "loss": 0.4145,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 1.7,
36
+ "learning_rate": 1.6590909090909094e-05,
37
+ "loss": 0.2846,
38
+ "step": 1500
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "eval_accuracy": 0.940004659832246,
43
+ "eval_f1": 0.8470136913817051,
44
+ "eval_loss": 0.23408983647823334,
45
+ "eval_precision": 0.8338325302773799,
46
+ "eval_recall": 0.8606182795698925,
47
+ "eval_runtime": 5.0803,
48
+ "eval_samples_per_second": 185.62,
49
+ "eval_steps_per_second": 37.203,
50
+ "step": 1760
51
+ },
52
+ {
53
+ "epoch": 2.27,
54
+ "learning_rate": 1.5454545454545454e-05,
55
+ "loss": 0.2114,
56
+ "step": 2000
57
+ },
58
+ {
59
+ "epoch": 2.84,
60
+ "learning_rate": 1.431818181818182e-05,
61
+ "loss": 0.1634,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 3.0,
66
+ "eval_accuracy": 0.9476351351351351,
67
+ "eval_f1": 0.8742690058479533,
68
+ "eval_loss": 0.20282697677612305,
69
+ "eval_precision": 0.8646161934805467,
70
+ "eval_recall": 0.8841397849462366,
71
+ "eval_runtime": 3.3011,
72
+ "eval_samples_per_second": 285.658,
73
+ "eval_steps_per_second": 57.253,
74
+ "step": 2640
75
+ },
76
+ {
77
+ "epoch": 3.41,
78
+ "learning_rate": 1.3181818181818183e-05,
79
+ "loss": 0.1243,
80
+ "step": 3000
81
+ },
82
+ {
83
+ "epoch": 3.98,
84
+ "learning_rate": 1.2045454545454547e-05,
85
+ "loss": 0.1158,
86
+ "step": 3500
87
+ },
88
+ {
89
+ "epoch": 4.0,
90
+ "eval_accuracy": 0.9500232991612302,
91
+ "eval_f1": 0.8811803278688525,
92
+ "eval_loss": 0.1980859786272049,
93
+ "eval_precision": 0.8603072983354674,
94
+ "eval_recall": 0.9030913978494624,
95
+ "eval_runtime": 4.9698,
96
+ "eval_samples_per_second": 189.745,
97
+ "eval_steps_per_second": 38.03,
98
+ "step": 3520
99
+ },
100
+ {
101
+ "epoch": 4.55,
102
+ "learning_rate": 1.0909090909090909e-05,
103
+ "loss": 0.0808,
104
+ "step": 4000
105
+ },
106
+ {
107
+ "epoch": 5.0,
108
+ "eval_accuracy": 0.9508970177073626,
109
+ "eval_f1": 0.8861842105263158,
110
+ "eval_loss": 0.19827169179916382,
111
+ "eval_precision": 0.8679123711340206,
112
+ "eval_recall": 0.905241935483871,
113
+ "eval_runtime": 4.2259,
114
+ "eval_samples_per_second": 223.15,
115
+ "eval_steps_per_second": 44.725,
116
+ "step": 4400
117
+ },
118
+ {
119
+ "epoch": 5.11,
120
+ "learning_rate": 9.772727272727273e-06,
121
+ "loss": 0.0809,
122
+ "step": 4500
123
+ },
124
+ {
125
+ "epoch": 5.68,
126
+ "learning_rate": 8.636363636363637e-06,
127
+ "loss": 0.0679,
128
+ "step": 5000
129
+ },
130
+ {
131
+ "epoch": 6.0,
132
+ "eval_accuracy": 0.952673578751165,
133
+ "eval_f1": 0.89337822671156,
134
+ "eval_loss": 0.1982535719871521,
135
+ "eval_precision": 0.8779032048786817,
136
+ "eval_recall": 0.9094086021505376,
137
+ "eval_runtime": 4.536,
138
+ "eval_samples_per_second": 207.892,
139
+ "eval_steps_per_second": 41.667,
140
+ "step": 5280
141
+ },
142
+ {
143
+ "epoch": 6.25,
144
+ "learning_rate": 7.500000000000001e-06,
145
+ "loss": 0.0582,
146
+ "step": 5500
147
+ },
148
+ {
149
+ "epoch": 6.82,
150
+ "learning_rate": 6.363636363636364e-06,
151
+ "loss": 0.0468,
152
+ "step": 6000
153
+ },
154
+ {
155
+ "epoch": 7.0,
156
+ "eval_accuracy": 0.9559354613233924,
157
+ "eval_f1": 0.9001193792280144,
158
+ "eval_loss": 0.20358432829380035,
159
+ "eval_precision": 0.8884524744697565,
160
+ "eval_recall": 0.9120967741935484,
161
+ "eval_runtime": 4.9544,
162
+ "eval_samples_per_second": 190.337,
163
+ "eval_steps_per_second": 38.148,
164
+ "step": 6160
165
+ },
166
+ {
167
+ "epoch": 7.39,
168
+ "learning_rate": 5.2272727272727274e-06,
169
+ "loss": 0.0436,
170
+ "step": 6500
171
+ },
172
+ {
173
+ "epoch": 7.95,
174
+ "learning_rate": 4.0909090909090915e-06,
175
+ "loss": 0.0403,
176
+ "step": 7000
177
+ },
178
+ {
179
+ "epoch": 8.0,
180
+ "eval_accuracy": 0.955527726001864,
181
+ "eval_f1": 0.8995240613432047,
182
+ "eval_loss": 0.20606616139411926,
183
+ "eval_precision": 0.8850156087408949,
184
+ "eval_recall": 0.9145161290322581,
185
+ "eval_runtime": 3.9742,
186
+ "eval_samples_per_second": 237.282,
187
+ "eval_steps_per_second": 47.557,
188
+ "step": 7040
189
+ },
190
+ {
191
+ "epoch": 8.52,
192
+ "learning_rate": 2.954545454545455e-06,
193
+ "loss": 0.0336,
194
+ "step": 7500
195
+ },
196
+ {
197
+ "epoch": 9.0,
198
+ "eval_accuracy": 0.9554694780987885,
199
+ "eval_f1": 0.9019062748212867,
200
+ "eval_loss": 0.21168170869350433,
201
+ "eval_precision": 0.8884976525821596,
202
+ "eval_recall": 0.915725806451613,
203
+ "eval_runtime": 2.986,
204
+ "eval_samples_per_second": 315.802,
205
+ "eval_steps_per_second": 63.294,
206
+ "step": 7920
207
+ },
208
+ {
209
+ "epoch": 9.09,
210
+ "learning_rate": 1.8181818181818183e-06,
211
+ "loss": 0.0328,
212
+ "step": 8000
213
+ },
214
+ {
215
+ "epoch": 9.66,
216
+ "learning_rate": 6.818181818181818e-07,
217
+ "loss": 0.0312,
218
+ "step": 8500
219
+ }
220
+ ],
221
+ "max_steps": 8800,
222
+ "num_train_epochs": 10,
223
+ "total_flos": 408432261397380.0,
224
+ "trial_name": null,
225
+ "trial_params": null
226
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b551fbf29c3318236ff0e19876fb0afff99641683390db79faf1b4fadab8b042
3
+ size 4088
vocab.txt ADDED
The diff for this file is too large to render. See raw diff