jatinarora2702 commited on
Commit
13ba46b
·
1 Parent(s): ec85235

first model version

Browse files
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/scibert_scivocab_uncased",
3
+ "architectures": [
4
+ "NerSpanModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10",
23
+ "11": "LABEL_11",
24
+ "12": "LABEL_12",
25
+ "13": "LABEL_13",
26
+ "14": "LABEL_14",
27
+ "15": "LABEL_15"
28
+ },
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 3072,
31
+ "label2id": {
32
+ "LABEL_0": 0,
33
+ "LABEL_1": 1,
34
+ "LABEL_10": 10,
35
+ "LABEL_11": 11,
36
+ "LABEL_12": 12,
37
+ "LABEL_13": 13,
38
+ "LABEL_14": 14,
39
+ "LABEL_15": 15,
40
+ "LABEL_2": 2,
41
+ "LABEL_3": 3,
42
+ "LABEL_4": 4,
43
+ "LABEL_5": 5,
44
+ "LABEL_6": 6,
45
+ "LABEL_7": 7,
46
+ "LABEL_8": 8,
47
+ "LABEL_9": 9
48
+ },
49
+ "layer_norm_eps": 1e-12,
50
+ "max_position_embeddings": 512,
51
+ "model_type": "bert",
52
+ "num_attention_heads": 12,
53
+ "num_hidden_layers": 12,
54
+ "pad_token_id": 0,
55
+ "position_embedding_type": "absolute",
56
+ "transformers_version": "4.2.2",
57
+ "type_vocab_size": 2,
58
+ "use_cache": true,
59
+ "vocab_size": 31090
60
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eedbf08f938742afe5a40383d72a796a3b37d55487c5bdeaf00625847057f47e
3
+ size 879572305
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fce980c90bc65349ec16ecc0d8281e2a7d3c807e857dfa31f1248b4a1dee473
3
+ size 439812425
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28138d1fb1d0d32f2fda32b243256b015d01cdea709da95a4eab2b151e227ef6
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "allenai/scibert_scivocab_uncased", "do_basic_tokenize": true, "never_split": null}
trainer_state.json ADDED
@@ -0,0 +1,1150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.954646017198851,
3
+ "best_model_checkpoint": "../out/bio/ner-scibert-spanclass-dice/checkpoints/checkpoint-12150",
4
+ "epoch": 53.76106194690266,
5
+ "global_step": 12150,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.66,
12
+ "learning_rate": 9.977876106194692e-06,
13
+ "loss": 0.8423,
14
+ "step": 150
15
+ },
16
+ {
17
+ "epoch": 0.66,
18
+ "eval_loss": 0.8136124014854431,
19
+ "eval_micro_f1": 0.8708517694112636,
20
+ "eval_runtime": 8.6661,
21
+ "eval_samples_per_second": 417.257,
22
+ "step": 150
23
+ },
24
+ {
25
+ "epoch": 1.33,
26
+ "learning_rate": 9.95575221238938e-06,
27
+ "loss": 0.7429,
28
+ "step": 300
29
+ },
30
+ {
31
+ "epoch": 1.33,
32
+ "eval_loss": 0.7772685289382935,
33
+ "eval_micro_f1": 0.9084623888802797,
34
+ "eval_runtime": 8.674,
35
+ "eval_samples_per_second": 416.877,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 1.99,
40
+ "learning_rate": 9.933628318584071e-06,
41
+ "loss": 0.7013,
42
+ "step": 450
43
+ },
44
+ {
45
+ "epoch": 1.99,
46
+ "eval_loss": 0.7601113319396973,
47
+ "eval_micro_f1": 0.9269911499422214,
48
+ "eval_runtime": 9.8502,
49
+ "eval_samples_per_second": 367.098,
50
+ "step": 450
51
+ },
52
+ {
53
+ "epoch": 2.65,
54
+ "learning_rate": 9.911504424778762e-06,
55
+ "loss": 0.6802,
56
+ "step": 600
57
+ },
58
+ {
59
+ "epoch": 2.65,
60
+ "eval_loss": 0.7511240243911743,
61
+ "eval_micro_f1": 0.9305862826855833,
62
+ "eval_runtime": 8.6885,
63
+ "eval_samples_per_second": 416.182,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 3.32,
68
+ "learning_rate": 9.889380530973453e-06,
69
+ "loss": 0.6621,
70
+ "step": 750
71
+ },
72
+ {
73
+ "epoch": 3.32,
74
+ "eval_loss": 0.7457098364830017,
75
+ "eval_micro_f1": 0.9341814154289452,
76
+ "eval_runtime": 8.7277,
77
+ "eval_samples_per_second": 414.315,
78
+ "step": 750
79
+ },
80
+ {
81
+ "epoch": 3.98,
82
+ "learning_rate": 9.867256637168142e-06,
83
+ "loss": 0.6506,
84
+ "step": 900
85
+ },
86
+ {
87
+ "epoch": 3.98,
88
+ "eval_loss": 0.7396827340126038,
89
+ "eval_micro_f1": 0.9391592915351384,
90
+ "eval_runtime": 9.3794,
91
+ "eval_samples_per_second": 385.525,
92
+ "step": 900
93
+ },
94
+ {
95
+ "epoch": 4.65,
96
+ "learning_rate": 9.845132743362832e-06,
97
+ "loss": 0.6494,
98
+ "step": 1050
99
+ },
100
+ {
101
+ "epoch": 4.65,
102
+ "eval_loss": 0.7364537119865417,
103
+ "eval_micro_f1": 0.9363938048094755,
104
+ "eval_runtime": 9.3845,
105
+ "eval_samples_per_second": 385.315,
106
+ "step": 1050
107
+ },
108
+ {
109
+ "epoch": 5.31,
110
+ "learning_rate": 9.823008849557523e-06,
111
+ "loss": 0.6381,
112
+ "step": 1200
113
+ },
114
+ {
115
+ "epoch": 5.31,
116
+ "eval_loss": 0.7324703335762024,
117
+ "eval_micro_f1": 0.9416482295882351,
118
+ "eval_runtime": 9.4893,
119
+ "eval_samples_per_second": 381.062,
120
+ "step": 1200
121
+ },
122
+ {
123
+ "epoch": 5.97,
124
+ "learning_rate": 9.800884955752214e-06,
125
+ "loss": 0.6266,
126
+ "step": 1350
127
+ },
128
+ {
129
+ "epoch": 5.97,
130
+ "eval_loss": 0.7316550016403198,
131
+ "eval_micro_f1": 0.9347345127740777,
132
+ "eval_runtime": 9.5002,
133
+ "eval_samples_per_second": 380.623,
134
+ "step": 1350
135
+ },
136
+ {
137
+ "epoch": 6.64,
138
+ "learning_rate": 9.778761061946903e-06,
139
+ "loss": 0.626,
140
+ "step": 1500
141
+ },
142
+ {
143
+ "epoch": 6.64,
144
+ "eval_loss": 0.7285893559455872,
145
+ "eval_micro_f1": 0.9441371676413318,
146
+ "eval_runtime": 8.8611,
147
+ "eval_samples_per_second": 408.077,
148
+ "step": 1500
149
+ },
150
+ {
151
+ "epoch": 7.3,
152
+ "learning_rate": 9.756637168141593e-06,
153
+ "loss": 0.6256,
154
+ "step": 1650
155
+ },
156
+ {
157
+ "epoch": 7.3,
158
+ "eval_loss": 0.7262205481529236,
159
+ "eval_micro_f1": 0.9491150437475251,
160
+ "eval_runtime": 9.627,
161
+ "eval_samples_per_second": 375.609,
162
+ "step": 1650
163
+ },
164
+ {
165
+ "epoch": 7.96,
166
+ "learning_rate": 9.734513274336284e-06,
167
+ "loss": 0.6227,
168
+ "step": 1800
169
+ },
170
+ {
171
+ "epoch": 7.96,
172
+ "eval_loss": 0.7256479859352112,
173
+ "eval_micro_f1": 0.9485619464023926,
174
+ "eval_runtime": 9.26,
175
+ "eval_samples_per_second": 390.497,
176
+ "step": 1800
177
+ },
178
+ {
179
+ "epoch": 8.63,
180
+ "learning_rate": 9.712389380530975e-06,
181
+ "loss": 0.6212,
182
+ "step": 1950
183
+ },
184
+ {
185
+ "epoch": 8.63,
186
+ "eval_loss": 0.7240573763847351,
187
+ "eval_micro_f1": 0.9485619464023926,
188
+ "eval_runtime": 9.5068,
189
+ "eval_samples_per_second": 380.358,
190
+ "step": 1950
191
+ },
192
+ {
193
+ "epoch": 9.29,
194
+ "learning_rate": 9.690265486725664e-06,
195
+ "loss": 0.6139,
196
+ "step": 2100
197
+ },
198
+ {
199
+ "epoch": 9.29,
200
+ "eval_loss": 0.723069965839386,
201
+ "eval_micro_f1": 0.9466261056944284,
202
+ "eval_runtime": 9.5375,
203
+ "eval_samples_per_second": 379.133,
204
+ "step": 2100
205
+ },
206
+ {
207
+ "epoch": 9.96,
208
+ "learning_rate": 9.668141592920355e-06,
209
+ "loss": 0.6171,
210
+ "step": 2250
211
+ },
212
+ {
213
+ "epoch": 9.96,
214
+ "eval_loss": 0.7234218716621399,
215
+ "eval_micro_f1": 0.9496681410926577,
216
+ "eval_runtime": 9.3125,
217
+ "eval_samples_per_second": 388.295,
218
+ "step": 2250
219
+ },
220
+ {
221
+ "epoch": 10.62,
222
+ "learning_rate": 9.646017699115045e-06,
223
+ "loss": 0.6093,
224
+ "step": 2400
225
+ },
226
+ {
227
+ "epoch": 10.62,
228
+ "eval_loss": 0.7221475839614868,
229
+ "eval_micro_f1": 0.9469026543669947,
230
+ "eval_runtime": 9.5515,
231
+ "eval_samples_per_second": 378.58,
232
+ "step": 2400
233
+ },
234
+ {
235
+ "epoch": 11.28,
236
+ "learning_rate": 9.623893805309736e-06,
237
+ "loss": 0.6078,
238
+ "step": 2550
239
+ },
240
+ {
241
+ "epoch": 11.28,
242
+ "eval_loss": 0.7231661081314087,
243
+ "eval_micro_f1": 0.947179203039561,
244
+ "eval_runtime": 9.4735,
245
+ "eval_samples_per_second": 381.697,
246
+ "step": 2550
247
+ },
248
+ {
249
+ "epoch": 11.95,
250
+ "learning_rate": 9.601769911504427e-06,
251
+ "loss": 0.6122,
252
+ "step": 2700
253
+ },
254
+ {
255
+ "epoch": 11.95,
256
+ "eval_loss": 0.7217335104942322,
257
+ "eval_micro_f1": 0.9446902649864644,
258
+ "eval_runtime": 9.4638,
259
+ "eval_samples_per_second": 382.089,
260
+ "step": 2700
261
+ },
262
+ {
263
+ "epoch": 12.61,
264
+ "learning_rate": 9.579646017699116e-06,
265
+ "loss": 0.612,
266
+ "step": 2850
267
+ },
268
+ {
269
+ "epoch": 12.61,
270
+ "eval_loss": 0.7212682366371155,
271
+ "eval_micro_f1": 0.9466261056944284,
272
+ "eval_runtime": 9.4758,
273
+ "eval_samples_per_second": 381.603,
274
+ "step": 2850
275
+ },
276
+ {
277
+ "epoch": 13.27,
278
+ "learning_rate": 9.557522123893806e-06,
279
+ "loss": 0.6134,
280
+ "step": 3000
281
+ },
282
+ {
283
+ "epoch": 13.27,
284
+ "eval_loss": 0.7197656035423279,
285
+ "eval_micro_f1": 0.9474557517121274,
286
+ "eval_runtime": 9.1216,
287
+ "eval_samples_per_second": 396.422,
288
+ "step": 3000
289
+ },
290
+ {
291
+ "epoch": 13.94,
292
+ "learning_rate": 9.535398230088495e-06,
293
+ "loss": 0.607,
294
+ "step": 3150
295
+ },
296
+ {
297
+ "epoch": 13.94,
298
+ "eval_loss": 0.7211037278175354,
299
+ "eval_micro_f1": 0.9430309729510666,
300
+ "eval_runtime": 9.5003,
301
+ "eval_samples_per_second": 380.62,
302
+ "step": 3150
303
+ },
304
+ {
305
+ "epoch": 14.6,
306
+ "learning_rate": 9.513274336283188e-06,
307
+ "loss": 0.6023,
308
+ "step": 3300
309
+ },
310
+ {
311
+ "epoch": 14.6,
312
+ "eval_loss": 0.7198509573936462,
313
+ "eval_micro_f1": 0.9477323003846936,
314
+ "eval_runtime": 9.7084,
315
+ "eval_samples_per_second": 372.461,
316
+ "step": 3300
317
+ },
318
+ {
319
+ "epoch": 15.27,
320
+ "learning_rate": 9.491150442477877e-06,
321
+ "loss": 0.6084,
322
+ "step": 3450
323
+ },
324
+ {
325
+ "epoch": 15.27,
326
+ "eval_loss": 0.7201675772666931,
327
+ "eval_micro_f1": 0.9460730083492959,
328
+ "eval_runtime": 8.7893,
329
+ "eval_samples_per_second": 411.412,
330
+ "step": 3450
331
+ },
332
+ {
333
+ "epoch": 15.93,
334
+ "learning_rate": 9.469026548672568e-06,
335
+ "loss": 0.6111,
336
+ "step": 3600
337
+ },
338
+ {
339
+ "epoch": 15.93,
340
+ "eval_loss": 0.7195026874542236,
341
+ "eval_micro_f1": 0.9463495570218622,
342
+ "eval_runtime": 9.5062,
343
+ "eval_samples_per_second": 380.382,
344
+ "step": 3600
345
+ },
346
+ {
347
+ "epoch": 16.59,
348
+ "learning_rate": 9.446902654867257e-06,
349
+ "loss": 0.5994,
350
+ "step": 3750
351
+ },
352
+ {
353
+ "epoch": 16.59,
354
+ "eval_loss": 0.7199171185493469,
355
+ "eval_micro_f1": 0.9449668136590308,
356
+ "eval_runtime": 9.3322,
357
+ "eval_samples_per_second": 387.476,
358
+ "step": 3750
359
+ },
360
+ {
361
+ "epoch": 17.26,
362
+ "learning_rate": 9.424778761061947e-06,
363
+ "loss": 0.6087,
364
+ "step": 3900
365
+ },
366
+ {
367
+ "epoch": 17.26,
368
+ "eval_loss": 0.7187968492507935,
369
+ "eval_micro_f1": 0.9477323003846936,
370
+ "eval_runtime": 10.5814,
371
+ "eval_samples_per_second": 341.733,
372
+ "step": 3900
373
+ },
374
+ {
375
+ "epoch": 17.92,
376
+ "learning_rate": 9.402654867256638e-06,
377
+ "loss": 0.604,
378
+ "step": 4050
379
+ },
380
+ {
381
+ "epoch": 17.92,
382
+ "eval_loss": 0.7195943593978882,
383
+ "eval_micro_f1": 0.9435840702961992,
384
+ "eval_runtime": 9.4668,
385
+ "eval_samples_per_second": 381.967,
386
+ "step": 4050
387
+ },
388
+ {
389
+ "epoch": 18.58,
390
+ "learning_rate": 9.380530973451329e-06,
391
+ "loss": 0.6044,
392
+ "step": 4200
393
+ },
394
+ {
395
+ "epoch": 18.58,
396
+ "eval_loss": 0.718863844871521,
397
+ "eval_micro_f1": 0.9358407074643429,
398
+ "eval_runtime": 8.7769,
399
+ "eval_samples_per_second": 411.992,
400
+ "step": 4200
401
+ },
402
+ {
403
+ "epoch": 19.25,
404
+ "learning_rate": 9.358407079646018e-06,
405
+ "loss": 0.6096,
406
+ "step": 4350
407
+ },
408
+ {
409
+ "epoch": 19.25,
410
+ "eval_loss": 0.7180087566375732,
411
+ "eval_micro_f1": 0.9438606189687655,
412
+ "eval_runtime": 9.581,
413
+ "eval_samples_per_second": 377.415,
414
+ "step": 4350
415
+ },
416
+ {
417
+ "epoch": 19.91,
418
+ "learning_rate": 9.336283185840708e-06,
419
+ "loss": 0.6046,
420
+ "step": 4500
421
+ },
422
+ {
423
+ "epoch": 19.91,
424
+ "eval_loss": 0.7181408405303955,
425
+ "eval_micro_f1": 0.9457964596767295,
426
+ "eval_runtime": 9.2502,
427
+ "eval_samples_per_second": 390.908,
428
+ "step": 4500
429
+ },
430
+ {
431
+ "epoch": 20.58,
432
+ "learning_rate": 9.314159292035399e-06,
433
+ "loss": 0.6028,
434
+ "step": 4650
435
+ },
436
+ {
437
+ "epoch": 20.58,
438
+ "eval_loss": 0.7181771397590637,
439
+ "eval_micro_f1": 0.9496681410926577,
440
+ "eval_runtime": 9.4887,
441
+ "eval_samples_per_second": 381.084,
442
+ "step": 4650
443
+ },
444
+ {
445
+ "epoch": 21.24,
446
+ "learning_rate": 9.29203539823009e-06,
447
+ "loss": 0.6066,
448
+ "step": 4800
449
+ },
450
+ {
451
+ "epoch": 21.24,
452
+ "eval_loss": 0.7180553674697876,
453
+ "eval_micro_f1": 0.9496681410926577,
454
+ "eval_runtime": 9.5643,
455
+ "eval_samples_per_second": 378.074,
456
+ "step": 4800
457
+ },
458
+ {
459
+ "epoch": 21.9,
460
+ "learning_rate": 9.26991150442478e-06,
461
+ "loss": 0.5981,
462
+ "step": 4950
463
+ },
464
+ {
465
+ "epoch": 21.9,
466
+ "eval_loss": 0.7184288501739502,
467
+ "eval_micro_f1": 0.9474557517121274,
468
+ "eval_runtime": 9.5293,
469
+ "eval_samples_per_second": 379.463,
470
+ "step": 4950
471
+ },
472
+ {
473
+ "epoch": 22.57,
474
+ "learning_rate": 9.24778761061947e-06,
475
+ "loss": 0.6051,
476
+ "step": 5100
477
+ },
478
+ {
479
+ "epoch": 22.57,
480
+ "eval_loss": 0.7198731899261475,
481
+ "eval_micro_f1": 0.9460730083492959,
482
+ "eval_runtime": 9.5484,
483
+ "eval_samples_per_second": 378.701,
484
+ "step": 5100
485
+ },
486
+ {
487
+ "epoch": 23.23,
488
+ "learning_rate": 9.22566371681416e-06,
489
+ "loss": 0.606,
490
+ "step": 5250
491
+ },
492
+ {
493
+ "epoch": 23.23,
494
+ "eval_loss": 0.719587504863739,
495
+ "eval_micro_f1": 0.9480088490572599,
496
+ "eval_runtime": 9.1658,
497
+ "eval_samples_per_second": 394.511,
498
+ "step": 5250
499
+ },
500
+ {
501
+ "epoch": 23.89,
502
+ "learning_rate": 9.203539823008851e-06,
503
+ "loss": 0.6026,
504
+ "step": 5400
505
+ },
506
+ {
507
+ "epoch": 23.89,
508
+ "eval_loss": 0.7177981734275818,
509
+ "eval_micro_f1": 0.9457964596767295,
510
+ "eval_runtime": 9.4924,
511
+ "eval_samples_per_second": 380.936,
512
+ "step": 5400
513
+ },
514
+ {
515
+ "epoch": 24.56,
516
+ "learning_rate": 9.181415929203542e-06,
517
+ "loss": 0.6056,
518
+ "step": 5550
519
+ },
520
+ {
521
+ "epoch": 24.56,
522
+ "eval_loss": 0.7172006368637085,
523
+ "eval_micro_f1": 0.9460730083492959,
524
+ "eval_runtime": 9.5788,
525
+ "eval_samples_per_second": 377.498,
526
+ "step": 5550
527
+ },
528
+ {
529
+ "epoch": 25.22,
530
+ "learning_rate": 9.15929203539823e-06,
531
+ "loss": 0.6055,
532
+ "step": 5700
533
+ },
534
+ {
535
+ "epoch": 25.22,
536
+ "eval_loss": 0.7203790545463562,
537
+ "eval_micro_f1": 0.9402654862254036,
538
+ "eval_runtime": 8.7723,
539
+ "eval_samples_per_second": 412.206,
540
+ "step": 5700
541
+ },
542
+ {
543
+ "epoch": 25.88,
544
+ "learning_rate": 9.137168141592921e-06,
545
+ "loss": 0.5973,
546
+ "step": 5850
547
+ },
548
+ {
549
+ "epoch": 25.88,
550
+ "eval_loss": 0.7177384495735168,
551
+ "eval_micro_f1": 0.9513274331280555,
552
+ "eval_runtime": 9.578,
553
+ "eval_samples_per_second": 377.534,
554
+ "step": 5850
555
+ },
556
+ {
557
+ "epoch": 26.55,
558
+ "learning_rate": 9.11504424778761e-06,
559
+ "loss": 0.6076,
560
+ "step": 6000
561
+ },
562
+ {
563
+ "epoch": 26.55,
564
+ "eval_loss": 0.7195953130722046,
565
+ "eval_micro_f1": 0.9433075216236328,
566
+ "eval_runtime": 10.5304,
567
+ "eval_samples_per_second": 343.388,
568
+ "step": 6000
569
+ },
570
+ {
571
+ "epoch": 27.21,
572
+ "learning_rate": 9.092920353982303e-06,
573
+ "loss": 0.6037,
574
+ "step": 6150
575
+ },
576
+ {
577
+ "epoch": 27.21,
578
+ "eval_loss": 0.7189124226570129,
579
+ "eval_micro_f1": 0.945243362331597,
580
+ "eval_runtime": 9.5745,
581
+ "eval_samples_per_second": 377.668,
582
+ "step": 6150
583
+ },
584
+ {
585
+ "epoch": 27.88,
586
+ "learning_rate": 9.070796460176992e-06,
587
+ "loss": 0.6002,
588
+ "step": 6300
589
+ },
590
+ {
591
+ "epoch": 27.88,
592
+ "eval_loss": 0.7183624505996704,
593
+ "eval_micro_f1": 0.9422013269333677,
594
+ "eval_runtime": 9.5247,
595
+ "eval_samples_per_second": 379.646,
596
+ "step": 6300
597
+ },
598
+ {
599
+ "epoch": 28.54,
600
+ "learning_rate": 9.048672566371682e-06,
601
+ "loss": 0.5995,
602
+ "step": 6450
603
+ },
604
+ {
605
+ "epoch": 28.54,
606
+ "eval_loss": 0.7185026407241821,
607
+ "eval_micro_f1": 0.945243362331597,
608
+ "eval_runtime": 8.8185,
609
+ "eval_samples_per_second": 410.046,
610
+ "step": 6450
611
+ },
612
+ {
613
+ "epoch": 29.2,
614
+ "learning_rate": 9.026548672566371e-06,
615
+ "loss": 0.6088,
616
+ "step": 6600
617
+ },
618
+ {
619
+ "epoch": 29.2,
620
+ "eval_loss": 0.7188266515731812,
621
+ "eval_micro_f1": 0.9419247782608015,
622
+ "eval_runtime": 9.6772,
623
+ "eval_samples_per_second": 373.662,
624
+ "step": 6600
625
+ },
626
+ {
627
+ "epoch": 29.87,
628
+ "learning_rate": 9.004424778761062e-06,
629
+ "loss": 0.602,
630
+ "step": 6750
631
+ },
632
+ {
633
+ "epoch": 29.87,
634
+ "eval_loss": 0.7173834443092346,
635
+ "eval_micro_f1": 0.9460730083492959,
636
+ "eval_runtime": 9.3753,
637
+ "eval_samples_per_second": 385.695,
638
+ "step": 6750
639
+ },
640
+ {
641
+ "epoch": 30.53,
642
+ "learning_rate": 8.982300884955753e-06,
643
+ "loss": 0.6057,
644
+ "step": 6900
645
+ },
646
+ {
647
+ "epoch": 30.53,
648
+ "eval_loss": 0.719177782535553,
649
+ "eval_micro_f1": 0.9474557517121274,
650
+ "eval_runtime": 9.5982,
651
+ "eval_samples_per_second": 376.736,
652
+ "step": 6900
653
+ },
654
+ {
655
+ "epoch": 31.19,
656
+ "learning_rate": 8.960176991150443e-06,
657
+ "loss": 0.604,
658
+ "step": 7050
659
+ },
660
+ {
661
+ "epoch": 31.19,
662
+ "eval_loss": 0.7192238569259644,
663
+ "eval_micro_f1": 0.9416482295882351,
664
+ "eval_runtime": 9.6495,
665
+ "eval_samples_per_second": 374.736,
666
+ "step": 7050
667
+ },
668
+ {
669
+ "epoch": 31.86,
670
+ "learning_rate": 8.938053097345133e-06,
671
+ "loss": 0.6062,
672
+ "step": 7200
673
+ },
674
+ {
675
+ "epoch": 31.86,
676
+ "eval_loss": 0.7208277583122253,
677
+ "eval_micro_f1": 0.9358407074643429,
678
+ "eval_runtime": 8.8172,
679
+ "eval_samples_per_second": 410.106,
680
+ "step": 7200
681
+ },
682
+ {
683
+ "epoch": 32.52,
684
+ "learning_rate": 8.915929203539823e-06,
685
+ "loss": 0.5952,
686
+ "step": 7350
687
+ },
688
+ {
689
+ "epoch": 32.52,
690
+ "eval_loss": 0.7197518944740295,
691
+ "eval_micro_f1": 0.94054203489797,
692
+ "eval_runtime": 9.5177,
693
+ "eval_samples_per_second": 379.925,
694
+ "step": 7350
695
+ },
696
+ {
697
+ "epoch": 33.19,
698
+ "learning_rate": 8.893805309734514e-06,
699
+ "loss": 0.6097,
700
+ "step": 7500
701
+ },
702
+ {
703
+ "epoch": 33.19,
704
+ "eval_loss": 0.7175168991088867,
705
+ "eval_micro_f1": 0.9466261056944284,
706
+ "eval_runtime": 9.6701,
707
+ "eval_samples_per_second": 373.936,
708
+ "step": 7500
709
+ },
710
+ {
711
+ "epoch": 33.85,
712
+ "learning_rate": 8.871681415929205e-06,
713
+ "loss": 0.6022,
714
+ "step": 7650
715
+ },
716
+ {
717
+ "epoch": 33.85,
718
+ "eval_loss": 0.716774046421051,
719
+ "eval_micro_f1": 0.9474557517121274,
720
+ "eval_runtime": 9.597,
721
+ "eval_samples_per_second": 376.786,
722
+ "step": 7650
723
+ },
724
+ {
725
+ "epoch": 34.51,
726
+ "learning_rate": 8.849557522123895e-06,
727
+ "loss": 0.6036,
728
+ "step": 7800
729
+ },
730
+ {
731
+ "epoch": 34.51,
732
+ "eval_loss": 0.716645359992981,
733
+ "eval_micro_f1": 0.9510508844554891,
734
+ "eval_runtime": 9.6812,
735
+ "eval_samples_per_second": 373.509,
736
+ "step": 7800
737
+ },
738
+ {
739
+ "epoch": 35.18,
740
+ "learning_rate": 8.827433628318584e-06,
741
+ "loss": 0.6032,
742
+ "step": 7950
743
+ },
744
+ {
745
+ "epoch": 35.18,
746
+ "eval_loss": 0.7164170742034912,
747
+ "eval_micro_f1": 0.9504977871103566,
748
+ "eval_runtime": 8.7651,
749
+ "eval_samples_per_second": 412.544,
750
+ "step": 7950
751
+ },
752
+ {
753
+ "epoch": 35.84,
754
+ "learning_rate": 8.805309734513275e-06,
755
+ "loss": 0.6009,
756
+ "step": 8100
757
+ },
758
+ {
759
+ "epoch": 35.84,
760
+ "eval_loss": 0.7191314697265625,
761
+ "eval_micro_f1": 0.9413716809156688,
762
+ "eval_runtime": 9.5325,
763
+ "eval_samples_per_second": 379.332,
764
+ "step": 8100
765
+ },
766
+ {
767
+ "epoch": 36.5,
768
+ "learning_rate": 8.783185840707966e-06,
769
+ "loss": 0.6038,
770
+ "step": 8250
771
+ },
772
+ {
773
+ "epoch": 36.5,
774
+ "eval_loss": 0.7175291776657104,
775
+ "eval_micro_f1": 0.9463495570218622,
776
+ "eval_runtime": 9.3583,
777
+ "eval_samples_per_second": 386.397,
778
+ "step": 8250
779
+ },
780
+ {
781
+ "epoch": 37.17,
782
+ "learning_rate": 8.761061946902656e-06,
783
+ "loss": 0.6014,
784
+ "step": 8400
785
+ },
786
+ {
787
+ "epoch": 37.17,
788
+ "eval_loss": 0.7180671095848083,
789
+ "eval_micro_f1": 0.9449668136590308,
790
+ "eval_runtime": 9.5558,
791
+ "eval_samples_per_second": 378.41,
792
+ "step": 8400
793
+ },
794
+ {
795
+ "epoch": 37.83,
796
+ "learning_rate": 8.738938053097345e-06,
797
+ "loss": 0.6038,
798
+ "step": 8550
799
+ },
800
+ {
801
+ "epoch": 37.83,
802
+ "eval_loss": 0.716873049736023,
803
+ "eval_micro_f1": 0.9504977871103566,
804
+ "eval_runtime": 9.4915,
805
+ "eval_samples_per_second": 380.972,
806
+ "step": 8550
807
+ },
808
+ {
809
+ "epoch": 38.5,
810
+ "learning_rate": 8.716814159292036e-06,
811
+ "loss": 0.6009,
812
+ "step": 8700
813
+ },
814
+ {
815
+ "epoch": 38.5,
816
+ "eval_loss": 0.7172227501869202,
817
+ "eval_micro_f1": 0.9422013269333677,
818
+ "eval_runtime": 9.542,
819
+ "eval_samples_per_second": 378.955,
820
+ "step": 8700
821
+ },
822
+ {
823
+ "epoch": 39.16,
824
+ "learning_rate": 8.694690265486727e-06,
825
+ "loss": 0.5999,
826
+ "step": 8850
827
+ },
828
+ {
829
+ "epoch": 39.16,
830
+ "eval_loss": 0.7166829109191895,
831
+ "eval_micro_f1": 0.9496681410926577,
832
+ "eval_runtime": 10.747,
833
+ "eval_samples_per_second": 336.465,
834
+ "step": 8850
835
+ },
836
+ {
837
+ "epoch": 39.82,
838
+ "learning_rate": 8.672566371681418e-06,
839
+ "loss": 0.6019,
840
+ "step": 9000
841
+ },
842
+ {
843
+ "epoch": 39.82,
844
+ "eval_loss": 0.7165006399154663,
845
+ "eval_micro_f1": 0.9502212384377903,
846
+ "eval_runtime": 9.5714,
847
+ "eval_samples_per_second": 377.793,
848
+ "step": 9000
849
+ },
850
+ {
851
+ "epoch": 40.49,
852
+ "learning_rate": 8.650442477876107e-06,
853
+ "loss": 0.6037,
854
+ "step": 9150
855
+ },
856
+ {
857
+ "epoch": 40.49,
858
+ "eval_loss": 0.7166875004768372,
859
+ "eval_micro_f1": 0.945243362331597,
860
+ "eval_runtime": 9.6208,
861
+ "eval_samples_per_second": 375.854,
862
+ "step": 9150
863
+ },
864
+ {
865
+ "epoch": 41.15,
866
+ "learning_rate": 8.628318584070797e-06,
867
+ "loss": 0.605,
868
+ "step": 9300
869
+ },
870
+ {
871
+ "epoch": 41.15,
872
+ "eval_loss": 0.7164918184280396,
873
+ "eval_micro_f1": 0.9463495570218622,
874
+ "eval_runtime": 9.5729,
875
+ "eval_samples_per_second": 377.733,
876
+ "step": 9300
877
+ },
878
+ {
879
+ "epoch": 41.81,
880
+ "learning_rate": 8.606194690265486e-06,
881
+ "loss": 0.5997,
882
+ "step": 9450
883
+ },
884
+ {
885
+ "epoch": 41.81,
886
+ "eval_loss": 0.7189673781394958,
887
+ "eval_micro_f1": 0.9433075216236328,
888
+ "eval_runtime": 8.8106,
889
+ "eval_samples_per_second": 410.413,
890
+ "step": 9450
891
+ },
892
+ {
893
+ "epoch": 42.48,
894
+ "learning_rate": 8.584070796460177e-06,
895
+ "loss": 0.6024,
896
+ "step": 9600
897
+ },
898
+ {
899
+ "epoch": 42.48,
900
+ "eval_loss": 0.7176528573036194,
901
+ "eval_micro_f1": 0.9435840702961992,
902
+ "eval_runtime": 9.5036,
903
+ "eval_samples_per_second": 380.489,
904
+ "step": 9600
905
+ },
906
+ {
907
+ "epoch": 43.14,
908
+ "learning_rate": 8.561946902654868e-06,
909
+ "loss": 0.6004,
910
+ "step": 9750
911
+ },
912
+ {
913
+ "epoch": 43.14,
914
+ "eval_loss": 0.7176242470741272,
915
+ "eval_micro_f1": 0.9455199110041632,
916
+ "eval_runtime": 9.3795,
917
+ "eval_samples_per_second": 385.523,
918
+ "step": 9750
919
+ },
920
+ {
921
+ "epoch": 43.81,
922
+ "learning_rate": 8.539823008849558e-06,
923
+ "loss": 0.6042,
924
+ "step": 9900
925
+ },
926
+ {
927
+ "epoch": 43.81,
928
+ "eval_loss": 0.718696653842926,
929
+ "eval_micro_f1": 0.9410951322431025,
930
+ "eval_runtime": 9.5715,
931
+ "eval_samples_per_second": 377.788,
932
+ "step": 9900
933
+ },
934
+ {
935
+ "epoch": 44.47,
936
+ "learning_rate": 8.517699115044249e-06,
937
+ "loss": 0.6018,
938
+ "step": 10050
939
+ },
940
+ {
941
+ "epoch": 44.47,
942
+ "eval_loss": 0.7181583046913147,
943
+ "eval_micro_f1": 0.9460730083492959,
944
+ "eval_runtime": 9.5656,
945
+ "eval_samples_per_second": 378.02,
946
+ "step": 10050
947
+ },
948
+ {
949
+ "epoch": 45.13,
950
+ "learning_rate": 8.495575221238938e-06,
951
+ "loss": 0.6055,
952
+ "step": 10200
953
+ },
954
+ {
955
+ "epoch": 45.13,
956
+ "eval_loss": 0.7177845239639282,
957
+ "eval_micro_f1": 0.9460730083492959,
958
+ "eval_runtime": 8.8211,
959
+ "eval_samples_per_second": 409.925,
960
+ "step": 10200
961
+ },
962
+ {
963
+ "epoch": 45.8,
964
+ "learning_rate": 8.473451327433629e-06,
965
+ "loss": 0.6009,
966
+ "step": 10350
967
+ },
968
+ {
969
+ "epoch": 45.8,
970
+ "eval_loss": 0.7190163135528564,
971
+ "eval_micro_f1": 0.9433075216236328,
972
+ "eval_runtime": 9.5296,
973
+ "eval_samples_per_second": 379.451,
974
+ "step": 10350
975
+ },
976
+ {
977
+ "epoch": 46.46,
978
+ "learning_rate": 8.45132743362832e-06,
979
+ "loss": 0.6053,
980
+ "step": 10500
981
+ },
982
+ {
983
+ "epoch": 46.46,
984
+ "eval_loss": 0.7190549969673157,
985
+ "eval_micro_f1": 0.9427544242785004,
986
+ "eval_runtime": 9.292,
987
+ "eval_samples_per_second": 389.152,
988
+ "step": 10500
989
+ },
990
+ {
991
+ "epoch": 47.12,
992
+ "learning_rate": 8.42920353982301e-06,
993
+ "loss": 0.5996,
994
+ "step": 10650
995
+ },
996
+ {
997
+ "epoch": 47.12,
998
+ "eval_loss": 0.7188761830329895,
999
+ "eval_micro_f1": 0.9380530968448733,
1000
+ "eval_runtime": 9.6402,
1001
+ "eval_samples_per_second": 375.096,
1002
+ "step": 10650
1003
+ },
1004
+ {
1005
+ "epoch": 47.79,
1006
+ "learning_rate": 8.4070796460177e-06,
1007
+ "loss": 0.6022,
1008
+ "step": 10800
1009
+ },
1010
+ {
1011
+ "epoch": 47.79,
1012
+ "eval_loss": 0.7177832722663879,
1013
+ "eval_micro_f1": 0.9446902649864644,
1014
+ "eval_runtime": 9.6418,
1015
+ "eval_samples_per_second": 375.035,
1016
+ "step": 10800
1017
+ },
1018
+ {
1019
+ "epoch": 48.45,
1020
+ "learning_rate": 8.38495575221239e-06,
1021
+ "loss": 0.6039,
1022
+ "step": 10950
1023
+ },
1024
+ {
1025
+ "epoch": 48.45,
1026
+ "eval_loss": 0.7174757719039917,
1027
+ "eval_micro_f1": 0.9460730083492959,
1028
+ "eval_runtime": 8.7509,
1029
+ "eval_samples_per_second": 413.213,
1030
+ "step": 10950
1031
+ },
1032
+ {
1033
+ "epoch": 49.12,
1034
+ "learning_rate": 8.36283185840708e-06,
1035
+ "loss": 0.5968,
1036
+ "step": 11100
1037
+ },
1038
+ {
1039
+ "epoch": 49.12,
1040
+ "eval_loss": 0.7184685468673706,
1041
+ "eval_micro_f1": 0.9410951322431025,
1042
+ "eval_runtime": 9.6464,
1043
+ "eval_samples_per_second": 374.856,
1044
+ "step": 11100
1045
+ },
1046
+ {
1047
+ "epoch": 49.78,
1048
+ "learning_rate": 8.340707964601771e-06,
1049
+ "loss": 0.5976,
1050
+ "step": 11250
1051
+ },
1052
+ {
1053
+ "epoch": 49.78,
1054
+ "eval_loss": 0.717673659324646,
1055
+ "eval_micro_f1": 0.9433075216236328,
1056
+ "eval_runtime": 8.9187,
1057
+ "eval_samples_per_second": 405.439,
1058
+ "step": 11250
1059
+ },
1060
+ {
1061
+ "epoch": 50.44,
1062
+ "learning_rate": 8.31858407079646e-06,
1063
+ "loss": 0.6025,
1064
+ "step": 11400
1065
+ },
1066
+ {
1067
+ "epoch": 50.44,
1068
+ "eval_loss": 0.7185506224632263,
1069
+ "eval_micro_f1": 0.9482853977298262,
1070
+ "eval_runtime": 9.5683,
1071
+ "eval_samples_per_second": 377.914,
1072
+ "step": 11400
1073
+ },
1074
+ {
1075
+ "epoch": 51.11,
1076
+ "learning_rate": 8.296460176991151e-06,
1077
+ "loss": 0.6045,
1078
+ "step": 11550
1079
+ },
1080
+ {
1081
+ "epoch": 51.11,
1082
+ "eval_loss": 0.7161318063735962,
1083
+ "eval_micro_f1": 0.9482853977298262,
1084
+ "eval_runtime": 9.4873,
1085
+ "eval_samples_per_second": 381.139,
1086
+ "step": 11550
1087
+ },
1088
+ {
1089
+ "epoch": 51.77,
1090
+ "learning_rate": 8.274336283185842e-06,
1091
+ "loss": 0.6066,
1092
+ "step": 11700
1093
+ },
1094
+ {
1095
+ "epoch": 51.77,
1096
+ "eval_loss": 0.7150410413742065,
1097
+ "eval_micro_f1": 0.9504977871103566,
1098
+ "eval_runtime": 9.772,
1099
+ "eval_samples_per_second": 370.036,
1100
+ "step": 11700
1101
+ },
1102
+ {
1103
+ "epoch": 52.43,
1104
+ "learning_rate": 8.252212389380532e-06,
1105
+ "loss": 0.5995,
1106
+ "step": 11850
1107
+ },
1108
+ {
1109
+ "epoch": 52.43,
1110
+ "eval_loss": 0.7161645293235779,
1111
+ "eval_micro_f1": 0.952710176490887,
1112
+ "eval_runtime": 9.5008,
1113
+ "eval_samples_per_second": 380.601,
1114
+ "step": 11850
1115
+ },
1116
+ {
1117
+ "epoch": 53.1,
1118
+ "learning_rate": 8.230088495575221e-06,
1119
+ "loss": 0.6021,
1120
+ "step": 12000
1121
+ },
1122
+ {
1123
+ "epoch": 53.1,
1124
+ "eval_loss": 0.7195029258728027,
1125
+ "eval_micro_f1": 0.9361172561369092,
1126
+ "eval_runtime": 10.6413,
1127
+ "eval_samples_per_second": 339.807,
1128
+ "step": 12000
1129
+ },
1130
+ {
1131
+ "epoch": 53.76,
1132
+ "learning_rate": 8.207964601769912e-06,
1133
+ "loss": 0.6027,
1134
+ "step": 12150
1135
+ },
1136
+ {
1137
+ "epoch": 53.76,
1138
+ "eval_loss": 0.71631920337677,
1139
+ "eval_micro_f1": 0.954646017198851,
1140
+ "eval_runtime": 9.5886,
1141
+ "eval_samples_per_second": 377.113,
1142
+ "step": 12150
1143
+ }
1144
+ ],
1145
+ "max_steps": 67800,
1146
+ "num_train_epochs": 300,
1147
+ "total_flos": 42104269489406592,
1148
+ "trial_name": null,
1149
+ "trial_params": null
1150
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1034fb299b54295696b5b0519ceb0a6e19a85b181abd8cf8409ef0077327cc7
3
+ size 2095
vocab.txt ADDED
The diff for this file is too large to render. See raw diff