andreas122001 commited on
Commit
ed33b00
1 Parent(s): be6123a

Upload 12 files

Browse files
config.json CHANGED
@@ -30,7 +30,7 @@
30
  "pad_token_id": 3,
31
  "pretraining_tp": 4,
32
  "problem_type": "single_label_classification",
33
- "seq_length": 2048,
34
  "skip_bias_add": true,
35
  "skip_bias_add_qkv": false,
36
  "slow_but_exact": false,
 
30
  "pad_token_id": 3,
31
  "pretraining_tp": 4,
32
  "problem_type": "single_label_classification",
33
+ "seq_length": 512,
34
  "skip_bias_add": true,
35
  "skip_bias_add_qkv": false,
36
  "slow_but_exact": false,
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e25972de8180fc10cad9e151af9cf748eb5e70bce391ee42a5beee72d894aac
3
  size 24020817451
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4ed7acd97147ae137798f194176b95c29dac68ebf8a78b7e0695d99e91b3e3
3
  size 24020817451
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c8f97329473aa11ca607af7aadd2dde0f2e8670980f786a51cd176f2762840
3
  size 9912311464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726bae473dd2a0db8d69761af142a4c6313d6bd5f0500e94f321636e66fd32d1
3
  size 9912311464
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8d463e9eeaf58b97d6a48cb3dbcb3169370ab28f131ea12cc27655cd9c16021
3
  size 2098070557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d56a47e44ff12d3b362896dcd05b334a231089d157a044a2bac708d014ca56
3
  size 2098070557
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "add_prefix_space": false,
3
  "bos_token": "<s>",
4
  "eos_token": "</s>",
5
- "model_max_length": 1000000000000000019884624838656,
6
  "name_or_path": "bigscience/bloomz-3b",
7
  "pad_token": "<pad>",
8
  "special_tokens_map_file": null,
 
2
  "add_prefix_space": false,
3
  "bos_token": "<s>",
4
  "eos_token": "</s>",
5
+ "model_max_length": 512 ,#1000000000000000019884624838656
6
  "name_or_path": "bigscience/bloomz-3b",
7
  "pad_token": "<pad>",
8
  "special_tokens_map_file": null,
trainer_state.json CHANGED
@@ -10,907 +10,907 @@
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 4.9971428571428576e-05,
13
- "loss": 8.3826,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 4.9e-05,
19
- "loss": 3.6284,
20
  "step": 35
21
  },
22
  {
23
  "epoch": 0.02,
24
- "eval_accuracy": 0.656,
25
- "eval_f1": 0.47560975609756095,
26
- "eval_loss": 1.8382066488265991,
27
- "eval_precision": 1.0,
28
- "eval_recall": 0.312,
29
- "eval_runtime": 462.8972,
30
- "eval_samples_per_second": 6.481,
31
- "eval_steps_per_second": 0.81,
32
  "step": 35
33
  },
34
  {
35
  "epoch": 0.04,
36
  "learning_rate": 4.8e-05,
37
- "loss": 0.8755,
38
  "step": 70
39
  },
40
  {
41
  "epoch": 0.04,
42
- "eval_accuracy": 0.891,
43
- "eval_f1": 0.8799118619170032,
44
- "eval_loss": 0.9740824103355408,
45
- "eval_precision": 0.9795584627964022,
46
- "eval_recall": 0.7986666666666666,
47
- "eval_runtime": 462.3762,
48
- "eval_samples_per_second": 6.488,
49
- "eval_steps_per_second": 0.811,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.06,
54
  "learning_rate": 4.7e-05,
55
- "loss": 0.5529,
56
  "step": 105
57
  },
58
  {
59
  "epoch": 0.06,
60
- "eval_accuracy": 0.85,
61
- "eval_f1": 0.823943661971831,
62
- "eval_loss": 0.8984822630882263,
63
- "eval_precision": 0.9971590909090909,
64
- "eval_recall": 0.702,
65
- "eval_runtime": 463.7211,
66
- "eval_samples_per_second": 6.469,
67
  "eval_steps_per_second": 0.809,
68
  "step": 105
69
  },
70
  {
71
  "epoch": 0.08,
72
  "learning_rate": 4.600000000000001e-05,
73
- "loss": 0.4281,
74
  "step": 140
75
  },
76
  {
77
  "epoch": 0.08,
78
- "eval_accuracy": 0.948,
79
- "eval_f1": 0.948,
80
- "eval_loss": 0.28269827365875244,
81
- "eval_precision": 0.948,
82
- "eval_recall": 0.948,
83
- "eval_runtime": 462.5168,
84
- "eval_samples_per_second": 6.486,
85
- "eval_steps_per_second": 0.811,
86
  "step": 140
87
  },
88
  {
89
  "epoch": 0.1,
90
  "learning_rate": 4.5e-05,
91
- "loss": 0.4027,
92
  "step": 175
93
  },
94
  {
95
  "epoch": 0.1,
96
- "eval_accuracy": 0.8686666666666667,
97
- "eval_f1": 0.8490421455938698,
98
- "eval_loss": 1.2850149869918823,
99
- "eval_precision": 0.9981981981981982,
100
- "eval_recall": 0.7386666666666667,
101
- "eval_runtime": 462.9807,
102
- "eval_samples_per_second": 6.48,
103
- "eval_steps_per_second": 0.81,
104
  "step": 175
105
  },
106
  {
107
  "epoch": 0.12,
108
  "learning_rate": 4.4000000000000006e-05,
109
- "loss": 0.4487,
110
  "step": 210
111
  },
112
  {
113
  "epoch": 0.12,
114
- "eval_accuracy": 0.9423333333333334,
115
- "eval_f1": 0.9391059486096445,
116
- "eval_loss": 0.1546011120080948,
117
- "eval_precision": 0.9947800149142431,
118
- "eval_recall": 0.8893333333333333,
119
- "eval_runtime": 462.5947,
120
- "eval_samples_per_second": 6.485,
121
- "eval_steps_per_second": 0.811,
122
  "step": 210
123
  },
124
  {
125
  "epoch": 0.14,
126
  "learning_rate": 4.3e-05,
127
- "loss": 0.4095,
128
  "step": 245
129
  },
130
  {
131
  "epoch": 0.14,
132
- "eval_accuracy": 0.8943333333333333,
133
- "eval_f1": 0.8818486768542676,
134
- "eval_loss": 0.8063055276870728,
135
- "eval_precision": 1.0,
136
- "eval_recall": 0.7886666666666666,
137
- "eval_runtime": 463.8636,
138
- "eval_samples_per_second": 6.467,
139
- "eval_steps_per_second": 0.808,
140
  "step": 245
141
  },
142
  {
143
  "epoch": 0.16,
144
  "learning_rate": 4.2e-05,
145
- "loss": 0.2566,
146
  "step": 280
147
  },
148
  {
149
  "epoch": 0.16,
150
- "eval_accuracy": 0.9623333333333334,
151
- "eval_f1": 0.9620932572962093,
152
- "eval_loss": 0.37129560112953186,
153
- "eval_precision": 0.9682646860229575,
154
- "eval_recall": 0.956,
155
- "eval_runtime": 462.5562,
156
- "eval_samples_per_second": 6.486,
157
- "eval_steps_per_second": 0.811,
158
  "step": 280
159
  },
160
  {
161
  "epoch": 0.18,
162
  "learning_rate": 4.1e-05,
163
- "loss": 0.2851,
164
  "step": 315
165
  },
166
  {
167
  "epoch": 0.18,
168
- "eval_accuracy": 0.9386666666666666,
169
- "eval_f1": 0.9357990230286113,
170
- "eval_loss": 0.3722746670246124,
171
- "eval_precision": 0.9816983894582724,
172
- "eval_recall": 0.894,
173
- "eval_runtime": 462.4018,
174
- "eval_samples_per_second": 6.488,
175
- "eval_steps_per_second": 0.811,
176
  "step": 315
177
  },
178
  {
179
  "epoch": 0.2,
180
  "learning_rate": 4e-05,
181
- "loss": 0.4674,
182
  "step": 350
183
  },
184
  {
185
  "epoch": 0.2,
186
- "eval_accuracy": 0.8966666666666666,
187
- "eval_f1": 0.8849294729027467,
188
- "eval_loss": 0.9987091422080994,
189
- "eval_precision": 0.998324958123953,
190
- "eval_recall": 0.7946666666666666,
191
- "eval_runtime": 462.5055,
192
- "eval_samples_per_second": 6.486,
193
- "eval_steps_per_second": 0.811,
194
  "step": 350
195
  },
196
  {
197
  "epoch": 0.22,
198
  "learning_rate": 3.9000000000000006e-05,
199
- "loss": 0.1749,
200
  "step": 385
201
  },
202
  {
203
  "epoch": 0.22,
204
- "eval_accuracy": 0.9473333333333334,
205
- "eval_f1": 0.9446002805049089,
206
- "eval_loss": 0.4473351240158081,
207
- "eval_precision": 0.996301775147929,
208
- "eval_recall": 0.898,
209
- "eval_runtime": 462.5402,
210
- "eval_samples_per_second": 6.486,
211
- "eval_steps_per_second": 0.811,
212
  "step": 385
213
  },
214
  {
215
  "epoch": 0.24,
216
  "learning_rate": 3.8e-05,
217
- "loss": 0.3134,
218
  "step": 420
219
  },
220
  {
221
  "epoch": 0.24,
222
- "eval_accuracy": 0.9313333333333333,
223
- "eval_f1": 0.9263759828448893,
224
- "eval_loss": 0.8257947564125061,
225
- "eval_precision": 0.9984591679506933,
226
- "eval_recall": 0.864,
227
- "eval_runtime": 462.4771,
228
- "eval_samples_per_second": 6.487,
229
- "eval_steps_per_second": 0.811,
230
  "step": 420
231
  },
232
  {
233
  "epoch": 0.26,
234
  "learning_rate": 3.7e-05,
235
- "loss": 0.4379,
236
  "step": 455
237
  },
238
  {
239
  "epoch": 0.26,
240
- "eval_accuracy": 0.9496666666666667,
241
- "eval_f1": 0.9478050466643623,
242
- "eval_loss": 0.34615999460220337,
243
- "eval_precision": 0.9842067480258435,
244
- "eval_recall": 0.914,
245
- "eval_runtime": 462.467,
246
- "eval_samples_per_second": 6.487,
247
- "eval_steps_per_second": 0.811,
248
  "step": 455
249
  },
250
  {
251
  "epoch": 0.28,
252
  "learning_rate": 3.6e-05,
253
- "loss": 0.1028,
254
  "step": 490
255
  },
256
  {
257
  "epoch": 0.28,
258
- "eval_accuracy": 0.9523333333333334,
259
- "eval_f1": 0.9523174391463821,
260
- "eval_loss": 0.3040553629398346,
261
- "eval_precision": 0.95263509006004,
262
- "eval_recall": 0.952,
263
- "eval_runtime": 462.402,
264
- "eval_samples_per_second": 6.488,
265
- "eval_steps_per_second": 0.811,
266
  "step": 490
267
  },
268
  {
269
  "epoch": 0.3,
270
  "learning_rate": 3.5e-05,
271
- "loss": 0.1392,
272
  "step": 525
273
  },
274
  {
275
  "epoch": 0.3,
276
- "eval_accuracy": 0.9576666666666667,
277
- "eval_f1": 0.9566996249573816,
278
- "eval_loss": 0.3112805485725403,
279
- "eval_precision": 0.9790648988136776,
280
- "eval_recall": 0.9353333333333333,
281
- "eval_runtime": 463.4665,
282
- "eval_samples_per_second": 6.473,
283
  "eval_steps_per_second": 0.809,
284
  "step": 525
285
  },
286
  {
287
  "epoch": 0.32,
288
  "learning_rate": 3.4000000000000007e-05,
289
- "loss": 0.0944,
290
  "step": 560
291
  },
292
  {
293
  "epoch": 0.32,
294
- "eval_accuracy": 0.9196666666666666,
295
- "eval_f1": 0.9127759681505611,
296
- "eval_loss": 0.9487587213516235,
297
- "eval_precision": 0.9984164687252574,
298
- "eval_recall": 0.8406666666666667,
299
- "eval_runtime": 463.9336,
300
- "eval_samples_per_second": 6.466,
301
- "eval_steps_per_second": 0.808,
302
  "step": 560
303
  },
304
  {
305
  "epoch": 0.34,
306
  "learning_rate": 3.3e-05,
307
- "loss": 0.1758,
308
  "step": 595
309
  },
310
  {
311
  "epoch": 0.34,
312
- "eval_accuracy": 0.935,
313
- "eval_f1": 0.9306296691568836,
314
- "eval_loss": 0.4119901657104492,
315
- "eval_precision": 0.9977116704805492,
316
- "eval_recall": 0.872,
317
- "eval_runtime": 462.719,
318
- "eval_samples_per_second": 6.483,
319
- "eval_steps_per_second": 0.81,
320
  "step": 595
321
  },
322
  {
323
  "epoch": 0.36,
324
  "learning_rate": 3.2000000000000005e-05,
325
- "loss": 0.1123,
326
  "step": 630
327
  },
328
  {
329
  "epoch": 0.36,
330
- "eval_accuracy": 0.9673333333333334,
331
- "eval_f1": 0.9671361502347419,
332
- "eval_loss": 0.18079321086406708,
333
- "eval_precision": 0.9730094466936572,
334
- "eval_recall": 0.9613333333333334,
335
- "eval_runtime": 462.4961,
336
- "eval_samples_per_second": 6.487,
337
- "eval_steps_per_second": 0.811,
338
  "step": 630
339
  },
340
  {
341
  "epoch": 0.38,
342
  "learning_rate": 3.1e-05,
343
- "loss": 0.187,
344
  "step": 665
345
  },
346
  {
347
  "epoch": 0.38,
348
- "eval_accuracy": 0.9563333333333334,
349
- "eval_f1": 0.954936360509116,
350
- "eval_loss": 0.3595654368400574,
351
- "eval_precision": 0.9864960909737029,
352
- "eval_recall": 0.9253333333333333,
353
- "eval_runtime": 463.4737,
354
- "eval_samples_per_second": 6.473,
355
- "eval_steps_per_second": 0.809,
356
  "step": 665
357
  },
358
  {
359
  "epoch": 0.4,
360
  "learning_rate": 3e-05,
361
- "loss": 0.1876,
362
  "step": 700
363
  },
364
  {
365
  "epoch": 0.4,
366
- "eval_accuracy": 0.962,
367
- "eval_f1": 0.9621262458471761,
368
- "eval_loss": 0.26878979802131653,
369
- "eval_precision": 0.9589403973509933,
370
- "eval_recall": 0.9653333333333334,
371
- "eval_runtime": 462.4658,
372
- "eval_samples_per_second": 6.487,
373
- "eval_steps_per_second": 0.811,
374
  "step": 700
375
  },
376
  {
377
  "epoch": 0.42,
378
  "learning_rate": 2.9e-05,
379
- "loss": 0.1762,
380
  "step": 735
381
  },
382
  {
383
  "epoch": 0.42,
384
- "eval_accuracy": 0.9513333333333334,
385
- "eval_f1": 0.949375866851595,
386
- "eval_loss": 0.30938494205474854,
387
- "eval_precision": 0.9891618497109826,
388
- "eval_recall": 0.9126666666666666,
389
- "eval_runtime": 462.6352,
390
- "eval_samples_per_second": 6.485,
391
- "eval_steps_per_second": 0.811,
392
  "step": 735
393
  },
394
  {
395
  "epoch": 0.44,
396
  "learning_rate": 2.8000000000000003e-05,
397
- "loss": 0.0164,
398
  "step": 770
399
  },
400
  {
401
  "epoch": 0.44,
402
- "eval_accuracy": 0.9543333333333334,
403
- "eval_f1": 0.9526115530958146,
404
- "eval_loss": 0.3230161964893341,
405
- "eval_precision": 0.9899352983465133,
406
- "eval_recall": 0.918,
407
- "eval_runtime": 464.4305,
408
- "eval_samples_per_second": 6.46,
409
- "eval_steps_per_second": 0.807,
410
  "step": 770
411
  },
412
  {
413
  "epoch": 0.46,
414
  "learning_rate": 2.7000000000000002e-05,
415
- "loss": 0.0903,
416
  "step": 805
417
  },
418
  {
419
  "epoch": 0.46,
420
- "eval_accuracy": 0.9593333333333334,
421
- "eval_f1": 0.9585597826086957,
422
- "eval_loss": 0.33148592710494995,
423
- "eval_precision": 0.9771468144044322,
424
- "eval_recall": 0.9406666666666667,
425
- "eval_runtime": 462.7729,
426
- "eval_samples_per_second": 6.483,
427
- "eval_steps_per_second": 0.81,
428
  "step": 805
429
  },
430
  {
431
  "epoch": 0.48,
432
  "learning_rate": 2.6000000000000002e-05,
433
- "loss": 0.2401,
434
  "step": 840
435
  },
436
  {
437
  "epoch": 0.48,
438
- "eval_accuracy": 0.9326666666666666,
439
- "eval_f1": 0.9285208775654635,
440
- "eval_loss": 0.5261781215667725,
441
- "eval_precision": 0.9894419306184012,
442
- "eval_recall": 0.8746666666666667,
443
- "eval_runtime": 462.0723,
444
- "eval_samples_per_second": 6.492,
445
- "eval_steps_per_second": 0.812,
446
  "step": 840
447
  },
448
  {
449
  "epoch": 0.5,
450
  "learning_rate": 2.5e-05,
451
- "loss": 0.1308,
452
  "step": 875
453
  },
454
  {
455
  "epoch": 0.5,
456
- "eval_accuracy": 0.922,
457
- "eval_f1": 0.9155844155844155,
458
- "eval_loss": 0.5081947445869446,
459
- "eval_precision": 0.9976415094339622,
460
- "eval_recall": 0.846,
461
- "eval_runtime": 462.3765,
462
- "eval_samples_per_second": 6.488,
463
- "eval_steps_per_second": 0.811,
464
  "step": 875
465
  },
466
  {
467
  "epoch": 0.52,
468
  "learning_rate": 2.4e-05,
469
- "loss": 0.1004,
470
  "step": 910
471
  },
472
  {
473
  "epoch": 0.52,
474
- "eval_accuracy": 0.9353333333333333,
475
- "eval_f1": 0.9321203638908327,
476
- "eval_loss": 0.6189997792243958,
477
- "eval_precision": 0.9808541973490427,
478
- "eval_recall": 0.888,
479
- "eval_runtime": 462.3735,
480
- "eval_samples_per_second": 6.488,
481
- "eval_steps_per_second": 0.811,
482
  "step": 910
483
  },
484
  {
485
  "epoch": 0.54,
486
  "learning_rate": 2.3000000000000003e-05,
487
- "loss": 0.0982,
488
  "step": 945
489
  },
490
  {
491
  "epoch": 0.54,
492
- "eval_accuracy": 0.9496666666666667,
493
- "eval_f1": 0.9480921278789962,
494
- "eval_loss": 0.5868619680404663,
495
- "eval_precision": 0.978708303761533,
496
- "eval_recall": 0.9193333333333333,
497
- "eval_runtime": 462.6069,
498
- "eval_samples_per_second": 6.485,
499
- "eval_steps_per_second": 0.811,
500
  "step": 945
501
  },
502
  {
503
  "epoch": 0.56,
504
  "learning_rate": 2.2000000000000003e-05,
505
- "loss": 0.1627,
506
  "step": 980
507
  },
508
  {
509
  "epoch": 0.56,
510
- "eval_accuracy": 0.9503333333333334,
511
- "eval_f1": 0.9483177245924385,
512
- "eval_loss": 0.5452014803886414,
513
- "eval_precision": 0.9884309472161966,
514
- "eval_recall": 0.9113333333333333,
515
- "eval_runtime": 462.2788,
516
- "eval_samples_per_second": 6.49,
517
- "eval_steps_per_second": 0.811,
518
  "step": 980
519
  },
520
  {
521
  "epoch": 0.58,
522
  "learning_rate": 2.1e-05,
523
- "loss": 0.2264,
524
  "step": 1015
525
  },
526
  {
527
  "epoch": 0.58,
528
- "eval_accuracy": 0.962,
529
- "eval_f1": 0.9608785175017158,
530
- "eval_loss": 0.36828112602233887,
531
- "eval_precision": 0.9900990099009901,
532
- "eval_recall": 0.9333333333333333,
533
- "eval_runtime": 462.546,
534
- "eval_samples_per_second": 6.486,
535
- "eval_steps_per_second": 0.811,
536
  "step": 1015
537
  },
538
  {
539
  "epoch": 0.6,
540
  "learning_rate": 2e-05,
541
- "loss": 0.2683,
542
  "step": 1050
543
  },
544
  {
545
  "epoch": 0.6,
546
- "eval_accuracy": 0.959,
547
- "eval_f1": 0.9577464788732394,
548
- "eval_loss": 0.2027631402015686,
549
- "eval_precision": 0.9879518072289156,
550
- "eval_recall": 0.9293333333333333,
551
- "eval_runtime": 462.4537,
552
- "eval_samples_per_second": 6.487,
553
- "eval_steps_per_second": 0.811,
554
  "step": 1050
555
  },
556
  {
557
  "epoch": 0.62,
558
  "learning_rate": 1.9e-05,
559
- "loss": 0.088,
560
  "step": 1085
561
  },
562
  {
563
  "epoch": 0.62,
564
- "eval_accuracy": 0.937,
565
- "eval_f1": 0.9328596802841918,
566
- "eval_loss": 0.5130247473716736,
567
- "eval_precision": 0.9984790874524715,
568
- "eval_recall": 0.8753333333333333,
569
- "eval_runtime": 462.3506,
570
- "eval_samples_per_second": 6.489,
571
- "eval_steps_per_second": 0.811,
572
  "step": 1085
573
  },
574
  {
575
  "epoch": 0.64,
576
  "learning_rate": 1.8e-05,
577
- "loss": 0.214,
578
  "step": 1120
579
  },
580
  {
581
  "epoch": 0.64,
582
- "eval_accuracy": 0.9443333333333334,
583
- "eval_f1": 0.9413829413829414,
584
- "eval_loss": 0.38535651564598083,
585
- "eval_precision": 0.994069681245367,
586
- "eval_recall": 0.894,
587
- "eval_runtime": 462.2352,
588
- "eval_samples_per_second": 6.49,
589
- "eval_steps_per_second": 0.811,
590
  "step": 1120
591
  },
592
  {
593
  "epoch": 0.66,
594
  "learning_rate": 1.7000000000000003e-05,
595
- "loss": 0.0811,
596
  "step": 1155
597
  },
598
  {
599
  "epoch": 0.66,
600
- "eval_accuracy": 0.9563333333333334,
601
- "eval_f1": 0.954936360509116,
602
- "eval_loss": 0.2761794328689575,
603
- "eval_precision": 0.9864960909737029,
604
- "eval_recall": 0.9253333333333333,
605
- "eval_runtime": 463.2198,
606
- "eval_samples_per_second": 6.476,
607
- "eval_steps_per_second": 0.81,
608
  "step": 1155
609
  },
610
  {
611
  "epoch": 0.68,
612
  "learning_rate": 1.6000000000000003e-05,
613
- "loss": 0.0716,
614
  "step": 1190
615
  },
616
  {
617
  "epoch": 0.68,
618
- "eval_accuracy": 0.9603333333333334,
619
- "eval_f1": 0.9593717992488903,
620
- "eval_loss": 0.3206270635128021,
621
- "eval_precision": 0.9832050384884534,
622
- "eval_recall": 0.9366666666666666,
623
- "eval_runtime": 461.5741,
624
- "eval_samples_per_second": 6.499,
625
- "eval_steps_per_second": 0.812,
626
  "step": 1190
627
  },
628
  {
629
  "epoch": 0.7,
630
  "learning_rate": 1.5e-05,
631
- "loss": 0.0843,
632
  "step": 1225
633
  },
634
  {
635
  "epoch": 0.7,
636
- "eval_accuracy": 0.9546666666666667,
637
- "eval_f1": 0.953103448275862,
638
- "eval_loss": 0.38527336716651917,
639
- "eval_precision": 0.9871428571428571,
640
- "eval_recall": 0.9213333333333333,
641
- "eval_runtime": 462.2164,
642
- "eval_samples_per_second": 6.49,
643
- "eval_steps_per_second": 0.811,
644
  "step": 1225
645
  },
646
  {
647
  "epoch": 0.72,
648
  "learning_rate": 1.4000000000000001e-05,
649
- "loss": 0.0744,
650
  "step": 1260
651
  },
652
  {
653
  "epoch": 0.72,
654
- "eval_accuracy": 0.9556666666666667,
655
- "eval_f1": 0.9542168674698795,
656
- "eval_loss": 0.4054282307624817,
657
- "eval_precision": 0.9864768683274021,
658
- "eval_recall": 0.924,
659
- "eval_runtime": 461.3748,
660
- "eval_samples_per_second": 6.502,
661
- "eval_steps_per_second": 0.813,
662
  "step": 1260
663
  },
664
  {
665
  "epoch": 0.74,
666
  "learning_rate": 1.3000000000000001e-05,
667
- "loss": 0.0748,
668
  "step": 1295
669
  },
670
  {
671
  "epoch": 0.74,
672
- "eval_accuracy": 0.9476666666666667,
673
- "eval_f1": 0.9450472523626182,
674
- "eval_loss": 0.4932408928871155,
675
- "eval_precision": 0.9948415622697127,
676
- "eval_recall": 0.9,
677
- "eval_runtime": 462.1327,
678
- "eval_samples_per_second": 6.492,
679
- "eval_steps_per_second": 0.811,
680
  "step": 1295
681
  },
682
  {
683
  "epoch": 0.76,
684
  "learning_rate": 1.2e-05,
685
- "loss": 0.0181,
686
  "step": 1330
687
  },
688
  {
689
  "epoch": 0.76,
690
- "eval_accuracy": 0.95,
691
- "eval_f1": 0.9479889042995839,
692
- "eval_loss": 0.4586262106895447,
693
- "eval_precision": 0.9877167630057804,
694
- "eval_recall": 0.9113333333333333,
695
- "eval_runtime": 462.3672,
696
- "eval_samples_per_second": 6.488,
697
- "eval_steps_per_second": 0.811,
698
  "step": 1330
699
  },
700
  {
701
  "epoch": 0.78,
702
  "learning_rate": 1.1000000000000001e-05,
703
- "loss": 0.098,
704
  "step": 1365
705
  },
706
  {
707
  "epoch": 0.78,
708
- "eval_accuracy": 0.9503333333333334,
709
- "eval_f1": 0.9479566887879846,
710
- "eval_loss": 0.4024898409843445,
711
- "eval_precision": 0.9955979457079971,
712
- "eval_recall": 0.9046666666666666,
713
- "eval_runtime": 461.9999,
714
- "eval_samples_per_second": 6.494,
715
- "eval_steps_per_second": 0.812,
716
  "step": 1365
717
  },
718
  {
719
  "epoch": 0.8,
720
  "learning_rate": 1e-05,
721
- "loss": 0.0752,
722
  "step": 1400
723
  },
724
  {
725
  "epoch": 0.8,
726
- "eval_accuracy": 0.955,
727
- "eval_f1": 0.9534322180062091,
728
- "eval_loss": 0.2519637942314148,
729
- "eval_precision": 0.9878484631879915,
730
- "eval_recall": 0.9213333333333333,
731
- "eval_runtime": 462.3269,
732
- "eval_samples_per_second": 6.489,
733
- "eval_steps_per_second": 0.811,
734
  "step": 1400
735
  },
736
  {
737
  "epoch": 0.82,
738
  "learning_rate": 9e-06,
739
- "loss": 0.0575,
740
  "step": 1435
741
  },
742
  {
743
  "epoch": 0.82,
744
- "eval_accuracy": 0.959,
745
- "eval_f1": 0.958120531154239,
746
- "eval_loss": 0.24917536973953247,
747
- "eval_precision": 0.9791231732776617,
748
- "eval_recall": 0.938,
749
- "eval_runtime": 462.2475,
750
- "eval_samples_per_second": 6.49,
751
- "eval_steps_per_second": 0.811,
752
  "step": 1435
753
  },
754
  {
755
  "epoch": 0.84,
756
  "learning_rate": 8.000000000000001e-06,
757
- "loss": 0.0691,
758
  "step": 1470
759
  },
760
  {
761
  "epoch": 0.84,
762
- "eval_accuracy": 0.9576666666666667,
763
- "eval_f1": 0.9564322469982849,
764
- "eval_loss": 0.3299550414085388,
765
- "eval_precision": 0.9851590106007068,
766
- "eval_recall": 0.9293333333333333,
767
- "eval_runtime": 463.7914,
768
- "eval_samples_per_second": 6.468,
769
  "eval_steps_per_second": 0.809,
770
  "step": 1470
771
  },
772
  {
773
  "epoch": 0.86,
774
  "learning_rate": 7.000000000000001e-06,
775
- "loss": 0.0617,
776
  "step": 1505
777
  },
778
  {
779
  "epoch": 0.86,
780
- "eval_accuracy": 0.957,
781
- "eval_f1": 0.9556853315012023,
782
- "eval_loss": 0.33974531292915344,
783
- "eval_precision": 0.9858256555634302,
784
- "eval_recall": 0.9273333333333333,
785
- "eval_runtime": 462.1154,
786
- "eval_samples_per_second": 6.492,
787
- "eval_steps_per_second": 0.811,
788
  "step": 1505
789
  },
790
  {
791
  "epoch": 0.88,
792
  "learning_rate": 6e-06,
793
- "loss": 0.0392,
794
  "step": 1540
795
  },
796
  {
797
  "epoch": 0.88,
798
- "eval_accuracy": 0.9573333333333334,
799
- "eval_f1": 0.9561042524005487,
800
- "eval_loss": 0.3205489218235016,
801
- "eval_precision": 0.9844632768361582,
802
- "eval_recall": 0.9293333333333333,
803
- "eval_runtime": 463.3482,
804
- "eval_samples_per_second": 6.475,
805
- "eval_steps_per_second": 0.809,
806
  "step": 1540
807
  },
808
  {
809
  "epoch": 0.9,
810
  "learning_rate": 5e-06,
811
- "loss": 0.0386,
812
  "step": 1575
813
  },
814
  {
815
  "epoch": 0.9,
816
- "eval_accuracy": 0.958,
817
- "eval_f1": 0.9571428571428571,
818
- "eval_loss": 0.27690985798835754,
819
- "eval_precision": 0.9770833333333333,
820
- "eval_recall": 0.938,
821
- "eval_runtime": 462.2334,
822
- "eval_samples_per_second": 6.49,
823
- "eval_steps_per_second": 0.811,
824
  "step": 1575
825
  },
826
  {
827
  "epoch": 0.92,
828
  "learning_rate": 4.000000000000001e-06,
829
- "loss": 0.0334,
830
  "step": 1610
831
  },
832
  {
833
  "epoch": 0.92,
834
- "eval_accuracy": 0.9573333333333334,
835
- "eval_f1": 0.9562243502051984,
836
- "eval_loss": 0.30371883511543274,
837
- "eval_precision": 0.9817415730337079,
838
- "eval_recall": 0.932,
839
- "eval_runtime": 462.4404,
840
- "eval_samples_per_second": 6.487,
841
- "eval_steps_per_second": 0.811,
842
  "step": 1610
843
  },
844
  {
845
  "epoch": 0.94,
846
  "learning_rate": 3e-06,
847
- "loss": 0.1167,
848
  "step": 1645
849
  },
850
  {
851
  "epoch": 0.94,
852
- "eval_accuracy": 0.9593333333333334,
853
- "eval_f1": 0.9583333333333333,
854
- "eval_loss": 0.3056192100048065,
855
- "eval_precision": 0.9824929971988795,
856
- "eval_recall": 0.9353333333333333,
857
- "eval_runtime": 461.9141,
858
- "eval_samples_per_second": 6.495,
859
- "eval_steps_per_second": 0.812,
860
  "step": 1645
861
  },
862
  {
863
  "epoch": 0.96,
864
  "learning_rate": 2.0000000000000003e-06,
865
- "loss": 0.0505,
866
  "step": 1680
867
  },
868
  {
869
  "epoch": 0.96,
870
- "eval_accuracy": 0.953,
871
- "eval_f1": 0.9510586601874348,
872
- "eval_loss": 0.42186784744262695,
873
- "eval_precision": 0.9920347574221579,
874
- "eval_recall": 0.9133333333333333,
875
- "eval_runtime": 462.3061,
876
- "eval_samples_per_second": 6.489,
877
- "eval_steps_per_second": 0.811,
878
  "step": 1680
879
  },
880
  {
881
  "epoch": 0.98,
882
  "learning_rate": 1.0000000000000002e-06,
883
- "loss": 0.0468,
884
  "step": 1715
885
  },
886
  {
887
  "epoch": 0.98,
888
- "eval_accuracy": 0.9576666666666667,
889
- "eval_f1": 0.9562822719449227,
890
- "eval_loss": 0.3499450385570526,
891
- "eval_precision": 0.9886120996441281,
892
- "eval_recall": 0.926,
893
- "eval_runtime": 462.275,
894
- "eval_samples_per_second": 6.49,
895
- "eval_steps_per_second": 0.811,
896
  "step": 1715
897
  },
898
  {
899
  "epoch": 1.0,
900
  "learning_rate": 0.0,
901
- "loss": 0.0429,
902
  "step": 1750
903
  },
904
  {
905
  "epoch": 1.0,
906
- "eval_accuracy": 0.9576666666666667,
907
- "eval_f1": 0.9562822719449227,
908
- "eval_loss": 0.34879612922668457,
909
- "eval_precision": 0.9886120996441281,
910
- "eval_recall": 0.926,
911
- "eval_runtime": 462.1636,
912
- "eval_samples_per_second": 6.491,
913
- "eval_steps_per_second": 0.811,
914
  "step": 1750
915
  }
916
  ],
 
10
  {
11
  "epoch": 0.0,
12
  "learning_rate": 4.9971428571428576e-05,
13
+ "loss": 18.0539,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 4.9e-05,
19
+ "loss": 4.1309,
20
  "step": 35
21
  },
22
  {
23
  "epoch": 0.02,
24
+ "eval_accuracy": 0.8826666666666667,
25
+ "eval_f1": 0.8778625954198473,
26
+ "eval_loss": 0.34147411584854126,
27
+ "eval_precision": 0.9153400868306801,
28
+ "eval_recall": 0.8433333333333334,
29
+ "eval_runtime": 463.3794,
30
+ "eval_samples_per_second": 6.474,
31
+ "eval_steps_per_second": 0.809,
32
  "step": 35
33
  },
34
  {
35
  "epoch": 0.04,
36
  "learning_rate": 4.8e-05,
37
+ "loss": 0.6366,
38
  "step": 70
39
  },
40
  {
41
  "epoch": 0.04,
42
+ "eval_accuracy": 0.9403333333333334,
43
+ "eval_f1": 0.9423881557772771,
44
+ "eval_loss": 0.139574334025383,
45
+ "eval_precision": 0.911014312383323,
46
+ "eval_recall": 0.976,
47
+ "eval_runtime": 463.7246,
48
+ "eval_samples_per_second": 6.469,
49
+ "eval_steps_per_second": 0.809,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.06,
54
  "learning_rate": 4.7e-05,
55
+ "loss": 0.8166,
56
  "step": 105
57
  },
58
  {
59
  "epoch": 0.06,
60
+ "eval_accuracy": 0.8373333333333334,
61
+ "eval_f1": 0.8576429404900816,
62
+ "eval_loss": 0.8452138900756836,
63
+ "eval_precision": 0.7624481327800829,
64
+ "eval_recall": 0.98,
65
+ "eval_runtime": 463.2546,
66
+ "eval_samples_per_second": 6.476,
67
  "eval_steps_per_second": 0.809,
68
  "step": 105
69
  },
70
  {
71
  "epoch": 0.08,
72
  "learning_rate": 4.600000000000001e-05,
73
+ "loss": 1.8141,
74
  "step": 140
75
  },
76
  {
77
  "epoch": 0.08,
78
+ "eval_accuracy": 0.8703333333333333,
79
+ "eval_f1": 0.8838459241564646,
80
+ "eval_loss": 2.6052372455596924,
81
+ "eval_precision": 0.8004326663061114,
82
+ "eval_recall": 0.9866666666666667,
83
+ "eval_runtime": 463.8217,
84
+ "eval_samples_per_second": 6.468,
85
+ "eval_steps_per_second": 0.809,
86
  "step": 140
87
  },
88
  {
89
  "epoch": 0.1,
90
  "learning_rate": 4.5e-05,
91
+ "loss": 0.994,
92
  "step": 175
93
  },
94
  {
95
  "epoch": 0.1,
96
+ "eval_accuracy": 0.8933333333333333,
97
+ "eval_f1": 0.9029714978775015,
98
+ "eval_loss": 0.5267955660820007,
99
+ "eval_precision": 0.8281423804226918,
100
+ "eval_recall": 0.9926666666666667,
101
+ "eval_runtime": 463.9594,
102
+ "eval_samples_per_second": 6.466,
103
+ "eval_steps_per_second": 0.808,
104
  "step": 175
105
  },
106
  {
107
  "epoch": 0.12,
108
  "learning_rate": 4.4000000000000006e-05,
109
+ "loss": 0.3827,
110
  "step": 210
111
  },
112
  {
113
  "epoch": 0.12,
114
+ "eval_accuracy": 0.9343333333333333,
115
+ "eval_f1": 0.9345297441010303,
116
+ "eval_loss": 0.296316534280777,
117
+ "eval_precision": 0.9317428760768721,
118
+ "eval_recall": 0.9373333333333334,
119
+ "eval_runtime": 463.5088,
120
+ "eval_samples_per_second": 6.472,
121
+ "eval_steps_per_second": 0.809,
122
  "step": 210
123
  },
124
  {
125
  "epoch": 0.14,
126
  "learning_rate": 4.3e-05,
127
+ "loss": 0.5048,
128
  "step": 245
129
  },
130
  {
131
  "epoch": 0.14,
132
+ "eval_accuracy": 0.9526666666666667,
133
+ "eval_f1": 0.9523489932885906,
134
+ "eval_loss": 0.2187061905860901,
135
+ "eval_precision": 0.9587837837837838,
136
+ "eval_recall": 0.946,
137
+ "eval_runtime": 467.9247,
138
+ "eval_samples_per_second": 6.411,
139
+ "eval_steps_per_second": 0.801,
140
  "step": 245
141
  },
142
  {
143
  "epoch": 0.16,
144
  "learning_rate": 4.2e-05,
145
+ "loss": 0.2841,
146
  "step": 280
147
  },
148
  {
149
  "epoch": 0.16,
150
+ "eval_accuracy": 0.8916666666666667,
151
+ "eval_f1": 0.8787765759045133,
152
+ "eval_loss": 1.027541160583496,
153
+ "eval_precision": 0.9974597798475868,
154
+ "eval_recall": 0.7853333333333333,
155
+ "eval_runtime": 463.3965,
156
+ "eval_samples_per_second": 6.474,
157
+ "eval_steps_per_second": 0.809,
158
  "step": 280
159
  },
160
  {
161
  "epoch": 0.18,
162
  "learning_rate": 4.1e-05,
163
+ "loss": 0.3962,
164
  "step": 315
165
  },
166
  {
167
  "epoch": 0.18,
168
+ "eval_accuracy": 0.9496666666666667,
169
+ "eval_f1": 0.948656919415165,
170
+ "eval_loss": 0.3296962380409241,
171
+ "eval_precision": 0.9680777238029147,
172
+ "eval_recall": 0.93,
173
+ "eval_runtime": 463.326,
174
+ "eval_samples_per_second": 6.475,
175
+ "eval_steps_per_second": 0.809,
176
  "step": 315
177
  },
178
  {
179
  "epoch": 0.2,
180
  "learning_rate": 4e-05,
181
+ "loss": 0.488,
182
  "step": 350
183
  },
184
  {
185
  "epoch": 0.2,
186
+ "eval_accuracy": 0.9443333333333334,
187
+ "eval_f1": 0.9429839535677706,
188
+ "eval_loss": 0.4798208773136139,
189
+ "eval_precision": 0.966410076976907,
190
+ "eval_recall": 0.9206666666666666,
191
+ "eval_runtime": 463.3936,
192
+ "eval_samples_per_second": 6.474,
193
+ "eval_steps_per_second": 0.809,
194
  "step": 350
195
  },
196
  {
197
  "epoch": 0.22,
198
  "learning_rate": 3.9000000000000006e-05,
199
+ "loss": 0.4094,
200
  "step": 385
201
  },
202
  {
203
  "epoch": 0.22,
204
+ "eval_accuracy": 0.9096666666666666,
205
+ "eval_f1": 0.9166922840454964,
206
+ "eval_loss": 0.5374864339828491,
207
+ "eval_precision": 0.8505419281232174,
208
+ "eval_recall": 0.994,
209
+ "eval_runtime": 463.4888,
210
+ "eval_samples_per_second": 6.473,
211
+ "eval_steps_per_second": 0.809,
212
  "step": 385
213
  },
214
  {
215
  "epoch": 0.24,
216
  "learning_rate": 3.8e-05,
217
+ "loss": 0.2203,
218
  "step": 420
219
  },
220
  {
221
  "epoch": 0.24,
222
+ "eval_accuracy": 0.957,
223
+ "eval_f1": 0.958130477117819,
224
+ "eval_loss": 0.18050691485404968,
225
+ "eval_precision": 0.9335863377609108,
226
+ "eval_recall": 0.984,
227
+ "eval_runtime": 463.3488,
228
+ "eval_samples_per_second": 6.475,
229
+ "eval_steps_per_second": 0.809,
230
  "step": 420
231
  },
232
  {
233
  "epoch": 0.26,
234
  "learning_rate": 3.7e-05,
235
+ "loss": 0.2526,
236
  "step": 455
237
  },
238
  {
239
  "epoch": 0.26,
240
+ "eval_accuracy": 0.9566666666666667,
241
+ "eval_f1": 0.9579288025889968,
242
+ "eval_loss": 0.32805779576301575,
243
+ "eval_precision": 0.9308176100628931,
244
+ "eval_recall": 0.9866666666666667,
245
+ "eval_runtime": 464.3941,
246
+ "eval_samples_per_second": 6.46,
247
+ "eval_steps_per_second": 0.808,
248
  "step": 455
249
  },
250
  {
251
  "epoch": 0.28,
252
  "learning_rate": 3.6e-05,
253
+ "loss": 0.1888,
254
  "step": 490
255
  },
256
  {
257
  "epoch": 0.28,
258
+ "eval_accuracy": 0.972,
259
+ "eval_f1": 0.9723502304147466,
260
+ "eval_loss": 0.15383633971214294,
261
+ "eval_precision": 0.9603381014304291,
262
+ "eval_recall": 0.9846666666666667,
263
+ "eval_runtime": 463.4989,
264
+ "eval_samples_per_second": 6.473,
265
+ "eval_steps_per_second": 0.809,
266
  "step": 490
267
  },
268
  {
269
  "epoch": 0.3,
270
  "learning_rate": 3.5e-05,
271
+ "loss": 0.1859,
272
  "step": 525
273
  },
274
  {
275
  "epoch": 0.3,
276
+ "eval_accuracy": 0.9783333333333334,
277
+ "eval_f1": 0.9781659388646288,
278
+ "eval_loss": 0.11581222712993622,
279
+ "eval_precision": 0.985781990521327,
280
+ "eval_recall": 0.9706666666666667,
281
+ "eval_runtime": 463.5881,
282
+ "eval_samples_per_second": 6.471,
283
  "eval_steps_per_second": 0.809,
284
  "step": 525
285
  },
286
  {
287
  "epoch": 0.32,
288
  "learning_rate": 3.4000000000000007e-05,
289
+ "loss": 0.1007,
290
  "step": 560
291
  },
292
  {
293
  "epoch": 0.32,
294
+ "eval_accuracy": 0.9753333333333334,
295
+ "eval_f1": 0.9755129053606882,
296
+ "eval_loss": 0.14892521500587463,
297
+ "eval_precision": 0.9684625492772667,
298
+ "eval_recall": 0.9826666666666667,
299
+ "eval_runtime": 463.553,
300
+ "eval_samples_per_second": 6.472,
301
+ "eval_steps_per_second": 0.809,
302
  "step": 560
303
  },
304
  {
305
  "epoch": 0.34,
306
  "learning_rate": 3.3e-05,
307
+ "loss": 0.1065,
308
  "step": 595
309
  },
310
  {
311
  "epoch": 0.34,
312
+ "eval_accuracy": 0.9726666666666667,
313
+ "eval_f1": 0.9720327421555252,
314
+ "eval_loss": 0.15965215861797333,
315
+ "eval_precision": 0.9951117318435754,
316
+ "eval_recall": 0.95,
317
+ "eval_runtime": 463.6429,
318
+ "eval_samples_per_second": 6.47,
319
+ "eval_steps_per_second": 0.809,
320
  "step": 595
321
  },
322
  {
323
  "epoch": 0.36,
324
  "learning_rate": 3.2000000000000005e-05,
325
+ "loss": 0.2868,
326
  "step": 630
327
  },
328
  {
329
  "epoch": 0.36,
330
+ "eval_accuracy": 0.9806666666666667,
331
+ "eval_f1": 0.9807180851063829,
332
+ "eval_loss": 0.08715511113405228,
333
+ "eval_precision": 0.9781167108753316,
334
+ "eval_recall": 0.9833333333333333,
335
+ "eval_runtime": 463.4149,
336
+ "eval_samples_per_second": 6.474,
337
+ "eval_steps_per_second": 0.809,
338
  "step": 630
339
  },
340
  {
341
  "epoch": 0.38,
342
  "learning_rate": 3.1e-05,
343
+ "loss": 0.1818,
344
  "step": 665
345
  },
346
  {
347
  "epoch": 0.38,
348
+ "eval_accuracy": 0.9796666666666667,
349
+ "eval_f1": 0.9795781720790091,
350
+ "eval_loss": 0.12202201038599014,
351
+ "eval_precision": 0.9838601210490922,
352
+ "eval_recall": 0.9753333333333334,
353
+ "eval_runtime": 464.4798,
354
+ "eval_samples_per_second": 6.459,
355
+ "eval_steps_per_second": 0.807,
356
  "step": 665
357
  },
358
  {
359
  "epoch": 0.4,
360
  "learning_rate": 3e-05,
361
+ "loss": 0.3238,
362
  "step": 700
363
  },
364
  {
365
  "epoch": 0.4,
366
+ "eval_accuracy": 0.9786666666666667,
367
+ "eval_f1": 0.9784221173297369,
368
+ "eval_loss": 0.16317808628082275,
369
+ "eval_precision": 0.9897680763983628,
370
+ "eval_recall": 0.9673333333333334,
371
+ "eval_runtime": 463.2126,
372
+ "eval_samples_per_second": 6.477,
373
+ "eval_steps_per_second": 0.81,
374
  "step": 700
375
  },
376
  {
377
  "epoch": 0.42,
378
  "learning_rate": 2.9e-05,
379
+ "loss": 0.125,
380
  "step": 735
381
  },
382
  {
383
  "epoch": 0.42,
384
+ "eval_accuracy": 0.9796666666666667,
385
+ "eval_f1": 0.9796054831160146,
386
+ "eval_loss": 0.1086646318435669,
387
+ "eval_precision": 0.9825620389000671,
388
+ "eval_recall": 0.9766666666666667,
389
+ "eval_runtime": 464.4048,
390
+ "eval_samples_per_second": 6.46,
391
+ "eval_steps_per_second": 0.807,
392
  "step": 735
393
  },
394
  {
395
  "epoch": 0.44,
396
  "learning_rate": 2.8000000000000003e-05,
397
+ "loss": 0.1361,
398
  "step": 770
399
  },
400
  {
401
  "epoch": 0.44,
402
+ "eval_accuracy": 0.976,
403
+ "eval_f1": 0.9754935330156569,
404
+ "eval_loss": 0.15513776242733002,
405
+ "eval_precision": 0.9965229485396384,
406
+ "eval_recall": 0.9553333333333334,
407
+ "eval_runtime": 463.6268,
408
+ "eval_samples_per_second": 6.471,
409
+ "eval_steps_per_second": 0.809,
410
  "step": 770
411
  },
412
  {
413
  "epoch": 0.46,
414
  "learning_rate": 2.7000000000000002e-05,
415
+ "loss": 0.1276,
416
  "step": 805
417
  },
418
  {
419
  "epoch": 0.46,
420
+ "eval_accuracy": 0.9683333333333334,
421
+ "eval_f1": 0.9673875729488499,
422
+ "eval_loss": 0.20085427165031433,
423
+ "eval_precision": 0.997169143665959,
424
+ "eval_recall": 0.9393333333333334,
425
+ "eval_runtime": 463.7219,
426
+ "eval_samples_per_second": 6.469,
427
+ "eval_steps_per_second": 0.809,
428
  "step": 805
429
  },
430
  {
431
  "epoch": 0.48,
432
  "learning_rate": 2.6000000000000002e-05,
433
+ "loss": 0.1618,
434
  "step": 840
435
  },
436
  {
437
  "epoch": 0.48,
438
+ "eval_accuracy": 0.9766666666666667,
439
+ "eval_f1": 0.9762066621346024,
440
+ "eval_loss": 0.12355328351259232,
441
+ "eval_precision": 0.9958391123439667,
442
+ "eval_recall": 0.9573333333333334,
443
+ "eval_runtime": 464.5376,
444
+ "eval_samples_per_second": 6.458,
445
+ "eval_steps_per_second": 0.807,
446
  "step": 840
447
  },
448
  {
449
  "epoch": 0.5,
450
  "learning_rate": 2.5e-05,
451
+ "loss": 0.2574,
452
  "step": 875
453
  },
454
  {
455
  "epoch": 0.5,
456
+ "eval_accuracy": 0.9806666666666667,
457
+ "eval_f1": 0.9804054054054054,
458
+ "eval_loss": 0.126968115568161,
459
+ "eval_precision": 0.9938356164383562,
460
+ "eval_recall": 0.9673333333333334,
461
+ "eval_runtime": 465.883,
462
+ "eval_samples_per_second": 6.439,
463
+ "eval_steps_per_second": 0.805,
464
  "step": 875
465
  },
466
  {
467
  "epoch": 0.52,
468
  "learning_rate": 2.4e-05,
469
+ "loss": 0.1482,
470
  "step": 910
471
  },
472
  {
473
  "epoch": 0.52,
474
+ "eval_accuracy": 0.9576666666666667,
475
+ "eval_f1": 0.9558874609239321,
476
+ "eval_loss": 0.2774529755115509,
477
+ "eval_precision": 0.9978245105148659,
478
+ "eval_recall": 0.9173333333333333,
479
+ "eval_runtime": 465.4264,
480
+ "eval_samples_per_second": 6.446,
481
+ "eval_steps_per_second": 0.806,
482
  "step": 910
483
  },
484
  {
485
  "epoch": 0.54,
486
  "learning_rate": 2.3000000000000003e-05,
487
+ "loss": 0.1156,
488
  "step": 945
489
  },
490
  {
491
  "epoch": 0.54,
492
+ "eval_accuracy": 0.9856666666666667,
493
+ "eval_f1": 0.9855849815621857,
494
+ "eval_loss": 0.1439618021249771,
495
+ "eval_precision": 0.9912339851652057,
496
+ "eval_recall": 0.98,
497
+ "eval_runtime": 466.4591,
498
+ "eval_samples_per_second": 6.431,
499
+ "eval_steps_per_second": 0.804,
500
  "step": 945
501
  },
502
  {
503
  "epoch": 0.56,
504
  "learning_rate": 2.2000000000000003e-05,
505
+ "loss": 0.2393,
506
  "step": 980
507
  },
508
  {
509
  "epoch": 0.56,
510
+ "eval_accuracy": 0.9406666666666667,
511
+ "eval_f1": 0.9369688385269122,
512
+ "eval_loss": 0.37739551067352295,
513
+ "eval_precision": 0.9992447129909365,
514
+ "eval_recall": 0.882,
515
+ "eval_runtime": 465.7379,
516
+ "eval_samples_per_second": 6.441,
517
+ "eval_steps_per_second": 0.805,
518
  "step": 980
519
  },
520
  {
521
  "epoch": 0.58,
522
  "learning_rate": 2.1e-05,
523
+ "loss": 0.2364,
524
  "step": 1015
525
  },
526
  {
527
  "epoch": 0.58,
528
+ "eval_accuracy": 0.984,
529
+ "eval_f1": 0.9838601210490923,
530
+ "eval_loss": 0.0981753021478653,
531
+ "eval_precision": 0.9925373134328358,
532
+ "eval_recall": 0.9753333333333334,
533
+ "eval_runtime": 464.4225,
534
+ "eval_samples_per_second": 6.46,
535
+ "eval_steps_per_second": 0.807,
536
  "step": 1015
537
  },
538
  {
539
  "epoch": 0.6,
540
  "learning_rate": 2e-05,
541
+ "loss": 0.1246,
542
  "step": 1050
543
  },
544
  {
545
  "epoch": 0.6,
546
+ "eval_accuracy": 0.985,
547
+ "eval_f1": 0.9848637739656912,
548
+ "eval_loss": 0.08204901963472366,
549
+ "eval_precision": 0.9938900203665988,
550
+ "eval_recall": 0.976,
551
+ "eval_runtime": 466.5569,
552
+ "eval_samples_per_second": 6.43,
553
+ "eval_steps_per_second": 0.804,
554
  "step": 1050
555
  },
556
  {
557
  "epoch": 0.62,
558
  "learning_rate": 1.9e-05,
559
+ "loss": 0.1411,
560
  "step": 1085
561
  },
562
  {
563
  "epoch": 0.62,
564
+ "eval_accuracy": 0.9873333333333333,
565
+ "eval_f1": 0.9872824631860776,
566
+ "eval_loss": 0.05377618223428726,
567
+ "eval_precision": 0.991263440860215,
568
+ "eval_recall": 0.9833333333333333,
569
+ "eval_runtime": 464.6089,
570
+ "eval_samples_per_second": 6.457,
571
+ "eval_steps_per_second": 0.807,
572
  "step": 1085
573
  },
574
  {
575
  "epoch": 0.64,
576
  "learning_rate": 1.8e-05,
577
+ "loss": 0.2055,
578
  "step": 1120
579
  },
580
  {
581
  "epoch": 0.64,
582
+ "eval_accuracy": 0.9736666666666667,
583
+ "eval_f1": 0.9730099077553809,
584
+ "eval_loss": 0.15216030180454254,
585
+ "eval_precision": 0.9978976874562018,
586
+ "eval_recall": 0.9493333333333334,
587
+ "eval_runtime": 465.4747,
588
+ "eval_samples_per_second": 6.445,
589
+ "eval_steps_per_second": 0.806,
590
  "step": 1120
591
  },
592
  {
593
  "epoch": 0.66,
594
  "learning_rate": 1.7000000000000003e-05,
595
+ "loss": 0.3018,
596
  "step": 1155
597
  },
598
  {
599
  "epoch": 0.66,
600
+ "eval_accuracy": 0.9813333333333333,
601
+ "eval_f1": 0.9815059445178336,
602
+ "eval_loss": 0.07627255469560623,
603
+ "eval_precision": 0.9725130890052356,
604
+ "eval_recall": 0.9906666666666667,
605
+ "eval_runtime": 464.4389,
606
+ "eval_samples_per_second": 6.459,
607
+ "eval_steps_per_second": 0.807,
608
  "step": 1155
609
  },
610
  {
611
  "epoch": 0.68,
612
  "learning_rate": 1.6000000000000003e-05,
613
+ "loss": 0.1702,
614
  "step": 1190
615
  },
616
  {
617
  "epoch": 0.68,
618
+ "eval_accuracy": 0.9873333333333333,
619
+ "eval_f1": 0.9873586161011311,
620
+ "eval_loss": 0.07290682196617126,
621
+ "eval_precision": 0.9853917662682603,
622
+ "eval_recall": 0.9893333333333333,
623
+ "eval_runtime": 464.5835,
624
+ "eval_samples_per_second": 6.457,
625
+ "eval_steps_per_second": 0.807,
626
  "step": 1190
627
  },
628
  {
629
  "epoch": 0.7,
630
  "learning_rate": 1.5e-05,
631
+ "loss": 0.2085,
632
  "step": 1225
633
  },
634
  {
635
  "epoch": 0.7,
636
+ "eval_accuracy": 0.986,
637
+ "eval_f1": 0.9859060402684564,
638
+ "eval_loss": 0.08615541458129883,
639
+ "eval_precision": 0.9925675675675676,
640
+ "eval_recall": 0.9793333333333333,
641
+ "eval_runtime": 464.5251,
642
+ "eval_samples_per_second": 6.458,
643
+ "eval_steps_per_second": 0.807,
644
  "step": 1225
645
  },
646
  {
647
  "epoch": 0.72,
648
  "learning_rate": 1.4000000000000001e-05,
649
+ "loss": 0.0899,
650
  "step": 1260
651
  },
652
  {
653
  "epoch": 0.72,
654
+ "eval_accuracy": 0.987,
655
+ "eval_f1": 0.9869782971619365,
656
+ "eval_loss": 0.07593820989131927,
657
+ "eval_precision": 0.988628762541806,
658
+ "eval_recall": 0.9853333333333333,
659
+ "eval_runtime": 464.4867,
660
+ "eval_samples_per_second": 6.459,
661
+ "eval_steps_per_second": 0.807,
662
  "step": 1260
663
  },
664
  {
665
  "epoch": 0.74,
666
  "learning_rate": 1.3000000000000001e-05,
667
+ "loss": 0.212,
668
  "step": 1295
669
  },
670
  {
671
  "epoch": 0.74,
672
+ "eval_accuracy": 0.9846666666666667,
673
+ "eval_f1": 0.9847277556440903,
674
+ "eval_loss": 0.08726092427968979,
675
+ "eval_precision": 0.9808201058201058,
676
+ "eval_recall": 0.9886666666666667,
677
+ "eval_runtime": 463.8528,
678
+ "eval_samples_per_second": 6.468,
679
+ "eval_steps_per_second": 0.808,
680
  "step": 1295
681
  },
682
  {
683
  "epoch": 0.76,
684
  "learning_rate": 1.2e-05,
685
+ "loss": 0.0459,
686
  "step": 1330
687
  },
688
  {
689
  "epoch": 0.76,
690
+ "eval_accuracy": 0.981,
691
+ "eval_f1": 0.9807237064592493,
692
+ "eval_loss": 0.11619190126657486,
693
+ "eval_precision": 0.9951956074124915,
694
+ "eval_recall": 0.9666666666666667,
695
+ "eval_runtime": 463.7918,
696
+ "eval_samples_per_second": 6.468,
697
+ "eval_steps_per_second": 0.809,
698
  "step": 1330
699
  },
700
  {
701
  "epoch": 0.78,
702
  "learning_rate": 1.1000000000000001e-05,
703
+ "loss": 0.2035,
704
  "step": 1365
705
  },
706
  {
707
  "epoch": 0.78,
708
+ "eval_accuracy": 0.9876666666666667,
709
+ "eval_f1": 0.9875797247398456,
710
+ "eval_loss": 0.07956338673830032,
711
+ "eval_precision": 0.9945909398242055,
712
+ "eval_recall": 0.9806666666666667,
713
+ "eval_runtime": 464.4206,
714
+ "eval_samples_per_second": 6.46,
715
+ "eval_steps_per_second": 0.807,
716
  "step": 1365
717
  },
718
  {
719
  "epoch": 0.8,
720
  "learning_rate": 1e-05,
721
+ "loss": 0.0942,
722
  "step": 1400
723
  },
724
  {
725
  "epoch": 0.8,
726
+ "eval_accuracy": 0.979,
727
+ "eval_f1": 0.979269496544916,
728
+ "eval_loss": 0.09173166751861572,
729
+ "eval_precision": 0.9668615984405458,
730
+ "eval_recall": 0.992,
731
+ "eval_runtime": 464.6027,
732
+ "eval_samples_per_second": 6.457,
733
+ "eval_steps_per_second": 0.807,
734
  "step": 1400
735
  },
736
  {
737
  "epoch": 0.82,
738
  "learning_rate": 9e-06,
739
+ "loss": 0.161,
740
  "step": 1435
741
  },
742
  {
743
  "epoch": 0.82,
744
+ "eval_accuracy": 0.9873333333333333,
745
+ "eval_f1": 0.9872397582269979,
746
+ "eval_loss": 0.06276000291109085,
747
+ "eval_precision": 0.9945872801082544,
748
+ "eval_recall": 0.98,
749
+ "eval_runtime": 463.3979,
750
+ "eval_samples_per_second": 6.474,
751
+ "eval_steps_per_second": 0.809,
752
  "step": 1435
753
  },
754
  {
755
  "epoch": 0.84,
756
  "learning_rate": 8.000000000000001e-06,
757
+ "loss": 0.0365,
758
  "step": 1470
759
  },
760
  {
761
  "epoch": 0.84,
762
+ "eval_accuracy": 0.9843333333333333,
763
+ "eval_f1": 0.9841162554917202,
764
+ "eval_loss": 0.08324441313743591,
765
+ "eval_precision": 0.997943797121316,
766
+ "eval_recall": 0.9706666666666667,
767
+ "eval_runtime": 463.6189,
768
+ "eval_samples_per_second": 6.471,
769
  "eval_steps_per_second": 0.809,
770
  "step": 1470
771
  },
772
  {
773
  "epoch": 0.86,
774
  "learning_rate": 7.000000000000001e-06,
775
+ "loss": 0.0508,
776
  "step": 1505
777
  },
778
  {
779
  "epoch": 0.86,
780
+ "eval_accuracy": 0.9843333333333333,
781
+ "eval_f1": 0.9841162554917202,
782
+ "eval_loss": 0.09412873536348343,
783
+ "eval_precision": 0.997943797121316,
784
+ "eval_recall": 0.9706666666666667,
785
+ "eval_runtime": 466.9343,
786
+ "eval_samples_per_second": 6.425,
787
+ "eval_steps_per_second": 0.803,
788
  "step": 1505
789
  },
790
  {
791
  "epoch": 0.88,
792
  "learning_rate": 6e-06,
793
+ "loss": 0.0597,
794
  "step": 1540
795
  },
796
  {
797
  "epoch": 0.88,
798
+ "eval_accuracy": 0.9873333333333333,
799
+ "eval_f1": 0.9872994652406418,
800
+ "eval_loss": 0.05775593966245651,
801
+ "eval_precision": 0.989946380697051,
802
+ "eval_recall": 0.9846666666666667,
803
+ "eval_runtime": 464.2967,
804
+ "eval_samples_per_second": 6.461,
805
+ "eval_steps_per_second": 0.808,
806
  "step": 1540
807
  },
808
  {
809
  "epoch": 0.9,
810
  "learning_rate": 5e-06,
811
+ "loss": 0.0055,
812
  "step": 1575
813
  },
814
  {
815
  "epoch": 0.9,
816
+ "eval_accuracy": 0.9863333333333333,
817
+ "eval_f1": 0.9861813279406809,
818
+ "eval_loss": 0.07944045215845108,
819
+ "eval_precision": 0.9972733469665985,
820
+ "eval_recall": 0.9753333333333334,
821
+ "eval_runtime": 464.3973,
822
+ "eval_samples_per_second": 6.46,
823
+ "eval_steps_per_second": 0.807,
824
  "step": 1575
825
  },
826
  {
827
  "epoch": 0.92,
828
  "learning_rate": 4.000000000000001e-06,
829
+ "loss": 0.0681,
830
  "step": 1610
831
  },
832
  {
833
  "epoch": 0.92,
834
+ "eval_accuracy": 0.9873333333333333,
835
+ "eval_f1": 0.9872139973082099,
836
+ "eval_loss": 0.07313308119773865,
837
+ "eval_precision": 0.9966032608695652,
838
+ "eval_recall": 0.978,
839
+ "eval_runtime": 464.8711,
840
+ "eval_samples_per_second": 6.453,
841
+ "eval_steps_per_second": 0.807,
842
  "step": 1610
843
  },
844
  {
845
  "epoch": 0.94,
846
  "learning_rate": 3e-06,
847
+ "loss": 0.0978,
848
  "step": 1645
849
  },
850
  {
851
  "epoch": 0.94,
852
+ "eval_accuracy": 0.9883333333333333,
853
+ "eval_f1": 0.9883138564273791,
854
+ "eval_loss": 0.058573223650455475,
855
+ "eval_precision": 0.9899665551839465,
856
+ "eval_recall": 0.9866666666666667,
857
+ "eval_runtime": 465.0949,
858
+ "eval_samples_per_second": 6.45,
859
+ "eval_steps_per_second": 0.806,
860
  "step": 1645
861
  },
862
  {
863
  "epoch": 0.96,
864
  "learning_rate": 2.0000000000000003e-06,
865
+ "loss": 0.1392,
866
  "step": 1680
867
  },
868
  {
869
  "epoch": 0.96,
870
+ "eval_accuracy": 0.9883333333333333,
871
+ "eval_f1": 0.9883060474440362,
872
+ "eval_loss": 0.055864058434963226,
873
+ "eval_precision": 0.9906229068988613,
874
+ "eval_recall": 0.986,
875
+ "eval_runtime": 464.3364,
876
+ "eval_samples_per_second": 6.461,
877
+ "eval_steps_per_second": 0.808,
878
  "step": 1680
879
  },
880
  {
881
  "epoch": 0.98,
882
  "learning_rate": 1.0000000000000002e-06,
883
+ "loss": 0.0432,
884
  "step": 1715
885
  },
886
  {
887
  "epoch": 0.98,
888
+ "eval_accuracy": 0.9883333333333333,
889
+ "eval_f1": 0.9883060474440362,
890
+ "eval_loss": 0.055420782417058945,
891
+ "eval_precision": 0.9906229068988613,
892
+ "eval_recall": 0.986,
893
+ "eval_runtime": 464.6017,
894
+ "eval_samples_per_second": 6.457,
895
+ "eval_steps_per_second": 0.807,
896
  "step": 1715
897
  },
898
  {
899
  "epoch": 1.0,
900
  "learning_rate": 0.0,
901
+ "loss": 0.0006,
902
  "step": 1750
903
  },
904
  {
905
  "epoch": 1.0,
906
+ "eval_accuracy": 0.988,
907
+ "eval_f1": 0.9879679144385026,
908
+ "eval_loss": 0.05567142367362976,
909
+ "eval_precision": 0.9906166219839142,
910
+ "eval_recall": 0.9853333333333333,
911
+ "eval_runtime": 464.5424,
912
+ "eval_samples_per_second": 6.458,
913
+ "eval_steps_per_second": 0.807,
914
  "step": 1750
915
  }
916
  ],