mmomm25 commited on
Commit
e242497
1 Parent(s): 0bf750a

Training in progress, epoch 4

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:060eaa533f9eb9f8cfeec8df48b0cbbc9a441a176c178abd9ff8dac3418c4a3d
3
  size 343230128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a30970205203a66044e82b0f2d859f75263755860693688948ab78c4df61a7d
3
  size 343230128
tmp-checkpoint-5573/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c897248b05a595815815af90cc11a34420404e8373577ba38254cec3b3d8db70
3
  size 343230128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a30970205203a66044e82b0f2d859f75263755860693688948ab78c4df61a7d
3
  size 343230128
tmp-checkpoint-5573/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc38ead823ba555cd314999d2a7dea3eeb68b769d09390f1c3200dfc2379395b
3
  size 686581178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d48912e184047ff12f593595e16ddfc1c743eaca237d092a6563b5cd10d2047
3
  size 686581178
tmp-checkpoint-5573/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e552cb9eaa9f79b2e530d02d6ff7e293885a342dfcfe67bf8980446d6d9ceb12
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef1f82b7c27d56152be2ffc3305ebd61ca0dd521f605c40bdc495d74bde7812a
3
  size 14244
tmp-checkpoint-5573/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.1309136003255844,
3
  "best_model_checkpoint": "vit-base-patch16-224-in21k-crack-detectorVITmain50epochs\\checkpoint-5573",
4
  "epoch": 4.999327203408836,
5
  "eval_steps": 500,
@@ -11,3443 +11,3443 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 8.976660682226213e-08,
14
- "loss": 1.3944,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.02,
19
  "learning_rate": 1.7953321364452426e-07,
20
- "loss": 1.3875,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.03,
25
  "learning_rate": 2.692998204667864e-07,
26
- "loss": 1.3999,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 3.590664272890485e-07,
32
- "loss": 1.3915,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.04,
37
  "learning_rate": 4.4883303411131064e-07,
38
- "loss": 1.3856,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.05,
43
  "learning_rate": 5.385996409335728e-07,
44
- "loss": 1.3912,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.06,
49
  "learning_rate": 6.283662477558349e-07,
50
- "loss": 1.3902,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 7.18132854578097e-07,
56
- "loss": 1.3851,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.08,
61
  "learning_rate": 8.078994614003591e-07,
62
- "loss": 1.3755,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.09,
67
  "learning_rate": 8.976660682226213e-07,
68
- "loss": 1.3793,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.1,
73
  "learning_rate": 9.874326750448833e-07,
74
- "loss": 1.3737,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.11,
79
  "learning_rate": 1.0771992818671456e-06,
80
- "loss": 1.3736,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.12,
85
  "learning_rate": 1.1669658886894075e-06,
86
- "loss": 1.3678,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.13,
91
  "learning_rate": 1.2567324955116697e-06,
92
- "loss": 1.3637,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.13,
97
  "learning_rate": 1.3464991023339318e-06,
98
- "loss": 1.357,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.14,
103
  "learning_rate": 1.436265709156194e-06,
104
- "loss": 1.3583,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.15,
109
  "learning_rate": 1.5260323159784561e-06,
110
- "loss": 1.361,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.16,
115
  "learning_rate": 1.6157989228007182e-06,
116
- "loss": 1.3517,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.17,
121
  "learning_rate": 1.7055655296229805e-06,
122
- "loss": 1.3543,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.18,
127
  "learning_rate": 1.7953321364452425e-06,
128
- "loss": 1.3497,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.19,
133
  "learning_rate": 1.8850987432675046e-06,
134
- "loss": 1.3436,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.2,
139
  "learning_rate": 1.9748653500897667e-06,
140
- "loss": 1.3304,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.21,
145
  "learning_rate": 2.064631956912029e-06,
146
- "loss": 1.3262,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.22,
151
  "learning_rate": 2.1543985637342912e-06,
152
- "loss": 1.3209,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.22,
157
  "learning_rate": 2.244165170556553e-06,
158
- "loss": 1.3054,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.23,
163
  "learning_rate": 2.333931777378815e-06,
164
- "loss": 1.3028,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.24,
169
  "learning_rate": 2.423698384201077e-06,
170
- "loss": 1.2939,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.25,
175
  "learning_rate": 2.5134649910233395e-06,
176
- "loss": 1.2894,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.26,
181
  "learning_rate": 2.6032315978456015e-06,
182
- "loss": 1.2838,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.27,
187
  "learning_rate": 2.6929982046678636e-06,
188
- "loss": 1.2625,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.28,
193
  "learning_rate": 2.7827648114901257e-06,
194
- "loss": 1.2727,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 0.29,
199
  "learning_rate": 2.872531418312388e-06,
200
- "loss": 1.2398,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 0.3,
205
  "learning_rate": 2.9622980251346502e-06,
206
- "loss": 1.2267,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 0.31,
211
  "learning_rate": 3.0520646319569123e-06,
212
- "loss": 1.2359,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 0.31,
217
  "learning_rate": 3.1418312387791743e-06,
218
- "loss": 1.218,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 0.32,
223
  "learning_rate": 3.2315978456014364e-06,
224
- "loss": 1.1877,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 0.33,
229
  "learning_rate": 3.321364452423698e-06,
230
- "loss": 1.1764,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 0.34,
235
  "learning_rate": 3.411131059245961e-06,
236
- "loss": 1.151,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 0.35,
241
  "learning_rate": 3.500897666068223e-06,
242
- "loss": 1.1334,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 0.36,
247
  "learning_rate": 3.590664272890485e-06,
248
- "loss": 1.1272,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 0.37,
253
  "learning_rate": 3.680430879712747e-06,
254
- "loss": 1.1077,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 0.38,
259
  "learning_rate": 3.770197486535009e-06,
260
- "loss": 1.1021,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 0.39,
265
  "learning_rate": 3.859964093357271e-06,
266
- "loss": 1.0583,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 0.39,
271
  "learning_rate": 3.949730700179533e-06,
272
- "loss": 1.0482,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 0.4,
277
  "learning_rate": 4.039497307001795e-06,
278
- "loss": 1.0214,
279
  "step": 450
280
  },
281
  {
282
  "epoch": 0.41,
283
  "learning_rate": 4.129263913824058e-06,
284
- "loss": 1.0114,
285
  "step": 460
286
  },
287
  {
288
  "epoch": 0.42,
289
  "learning_rate": 4.21903052064632e-06,
290
- "loss": 1.0237,
291
  "step": 470
292
  },
293
  {
294
  "epoch": 0.43,
295
  "learning_rate": 4.3087971274685824e-06,
296
- "loss": 0.969,
297
  "step": 480
298
  },
299
  {
300
  "epoch": 0.44,
301
  "learning_rate": 4.3985637342908445e-06,
302
- "loss": 0.966,
303
  "step": 490
304
  },
305
  {
306
  "epoch": 0.45,
307
  "learning_rate": 4.488330341113106e-06,
308
- "loss": 0.9518,
309
  "step": 500
310
  },
311
  {
312
  "epoch": 0.46,
313
  "learning_rate": 4.578096947935368e-06,
314
- "loss": 0.9545,
315
  "step": 510
316
  },
317
  {
318
  "epoch": 0.47,
319
  "learning_rate": 4.66786355475763e-06,
320
- "loss": 0.9473,
321
  "step": 520
322
  },
323
  {
324
  "epoch": 0.48,
325
  "learning_rate": 4.757630161579892e-06,
326
- "loss": 0.9026,
327
  "step": 530
328
  },
329
  {
330
  "epoch": 0.48,
331
  "learning_rate": 4.847396768402154e-06,
332
- "loss": 0.8535,
333
  "step": 540
334
  },
335
  {
336
  "epoch": 0.49,
337
  "learning_rate": 4.937163375224417e-06,
338
- "loss": 0.8598,
339
  "step": 550
340
  },
341
  {
342
  "epoch": 0.5,
343
  "learning_rate": 5.026929982046679e-06,
344
- "loss": 0.8154,
345
  "step": 560
346
  },
347
  {
348
  "epoch": 0.51,
349
  "learning_rate": 5.116696588868941e-06,
350
- "loss": 0.8561,
351
  "step": 570
352
  },
353
  {
354
  "epoch": 0.52,
355
  "learning_rate": 5.206463195691203e-06,
356
- "loss": 0.7924,
357
  "step": 580
358
  },
359
  {
360
  "epoch": 0.53,
361
  "learning_rate": 5.296229802513465e-06,
362
- "loss": 0.7947,
363
  "step": 590
364
  },
365
  {
366
  "epoch": 0.54,
367
  "learning_rate": 5.385996409335727e-06,
368
- "loss": 0.7999,
369
  "step": 600
370
  },
371
  {
372
  "epoch": 0.55,
373
  "learning_rate": 5.475763016157989e-06,
374
- "loss": 0.7771,
375
  "step": 610
376
  },
377
  {
378
  "epoch": 0.56,
379
  "learning_rate": 5.565529622980251e-06,
380
- "loss": 0.7826,
381
  "step": 620
382
  },
383
  {
384
  "epoch": 0.57,
385
  "learning_rate": 5.655296229802514e-06,
386
- "loss": 0.7321,
387
  "step": 630
388
  },
389
  {
390
  "epoch": 0.57,
391
  "learning_rate": 5.745062836624776e-06,
392
- "loss": 0.7689,
393
  "step": 640
394
  },
395
  {
396
  "epoch": 0.58,
397
  "learning_rate": 5.834829443447038e-06,
398
- "loss": 0.7546,
399
  "step": 650
400
  },
401
  {
402
  "epoch": 0.59,
403
  "learning_rate": 5.9245960502693004e-06,
404
- "loss": 0.7336,
405
  "step": 660
406
  },
407
  {
408
  "epoch": 0.6,
409
  "learning_rate": 6.0143626570915625e-06,
410
- "loss": 0.6889,
411
  "step": 670
412
  },
413
  {
414
  "epoch": 0.61,
415
  "learning_rate": 6.1041292639138246e-06,
416
- "loss": 0.6996,
417
  "step": 680
418
  },
419
  {
420
  "epoch": 0.62,
421
  "learning_rate": 6.193895870736087e-06,
422
- "loss": 0.6994,
423
  "step": 690
424
  },
425
  {
426
  "epoch": 0.63,
427
  "learning_rate": 6.283662477558349e-06,
428
- "loss": 0.74,
429
  "step": 700
430
  },
431
  {
432
  "epoch": 0.64,
433
  "learning_rate": 6.373429084380611e-06,
434
- "loss": 0.711,
435
  "step": 710
436
  },
437
  {
438
  "epoch": 0.65,
439
  "learning_rate": 6.463195691202873e-06,
440
- "loss": 0.6765,
441
  "step": 720
442
  },
443
  {
444
  "epoch": 0.65,
445
  "learning_rate": 6.552962298025135e-06,
446
- "loss": 0.6717,
447
  "step": 730
448
  },
449
  {
450
  "epoch": 0.66,
451
  "learning_rate": 6.642728904847396e-06,
452
- "loss": 0.6776,
453
  "step": 740
454
  },
455
  {
456
  "epoch": 0.67,
457
  "learning_rate": 6.732495511669659e-06,
458
- "loss": 0.62,
459
  "step": 750
460
  },
461
  {
462
  "epoch": 0.68,
463
  "learning_rate": 6.822262118491922e-06,
464
- "loss": 0.6392,
465
  "step": 760
466
  },
467
  {
468
  "epoch": 0.69,
469
  "learning_rate": 6.912028725314183e-06,
470
- "loss": 0.62,
471
  "step": 770
472
  },
473
  {
474
  "epoch": 0.7,
475
  "learning_rate": 7.001795332136446e-06,
476
- "loss": 0.6103,
477
  "step": 780
478
  },
479
  {
480
  "epoch": 0.71,
481
  "learning_rate": 7.091561938958707e-06,
482
- "loss": 0.6504,
483
  "step": 790
484
  },
485
  {
486
  "epoch": 0.72,
487
  "learning_rate": 7.18132854578097e-06,
488
- "loss": 0.6126,
489
  "step": 800
490
  },
491
  {
492
  "epoch": 0.73,
493
  "learning_rate": 7.271095152603231e-06,
494
- "loss": 0.5707,
495
  "step": 810
496
  },
497
  {
498
  "epoch": 0.74,
499
  "learning_rate": 7.360861759425494e-06,
500
- "loss": 0.587,
501
  "step": 820
502
  },
503
  {
504
  "epoch": 0.74,
505
  "learning_rate": 7.4506283662477555e-06,
506
- "loss": 0.5684,
507
  "step": 830
508
  },
509
  {
510
  "epoch": 0.75,
511
  "learning_rate": 7.540394973070018e-06,
512
- "loss": 0.5586,
513
  "step": 840
514
  },
515
  {
516
  "epoch": 0.76,
517
  "learning_rate": 7.630161579892281e-06,
518
- "loss": 0.6074,
519
  "step": 850
520
  },
521
  {
522
  "epoch": 0.77,
523
  "learning_rate": 7.719928186714543e-06,
524
- "loss": 0.5638,
525
  "step": 860
526
  },
527
  {
528
  "epoch": 0.78,
529
  "learning_rate": 7.809694793536805e-06,
530
- "loss": 0.5952,
531
  "step": 870
532
  },
533
  {
534
  "epoch": 0.79,
535
  "learning_rate": 7.899461400359067e-06,
536
- "loss": 0.5647,
537
  "step": 880
538
  },
539
  {
540
  "epoch": 0.8,
541
  "learning_rate": 7.98922800718133e-06,
542
- "loss": 0.6049,
543
  "step": 890
544
  },
545
  {
546
  "epoch": 0.81,
547
  "learning_rate": 8.07899461400359e-06,
548
- "loss": 0.5525,
549
  "step": 900
550
  },
551
  {
552
  "epoch": 0.82,
553
  "learning_rate": 8.168761220825854e-06,
554
- "loss": 0.522,
555
  "step": 910
556
  },
557
  {
558
  "epoch": 0.83,
559
  "learning_rate": 8.258527827648117e-06,
560
- "loss": 0.5489,
561
  "step": 920
562
  },
563
  {
564
  "epoch": 0.83,
565
  "learning_rate": 8.348294434470378e-06,
566
- "loss": 0.5394,
567
  "step": 930
568
  },
569
  {
570
  "epoch": 0.84,
571
  "learning_rate": 8.43806104129264e-06,
572
- "loss": 0.5707,
573
  "step": 940
574
  },
575
  {
576
  "epoch": 0.85,
577
  "learning_rate": 8.527827648114902e-06,
578
- "loss": 0.5322,
579
  "step": 950
580
  },
581
  {
582
  "epoch": 0.86,
583
  "learning_rate": 8.617594254937165e-06,
584
- "loss": 0.5637,
585
  "step": 960
586
  },
587
  {
588
  "epoch": 0.87,
589
  "learning_rate": 8.707360861759426e-06,
590
- "loss": 0.529,
591
  "step": 970
592
  },
593
  {
594
  "epoch": 0.88,
595
  "learning_rate": 8.797127468581689e-06,
596
- "loss": 0.51,
597
  "step": 980
598
  },
599
  {
600
  "epoch": 0.89,
601
  "learning_rate": 8.88689407540395e-06,
602
- "loss": 0.4844,
603
  "step": 990
604
  },
605
  {
606
  "epoch": 0.9,
607
  "learning_rate": 8.976660682226211e-06,
608
- "loss": 0.5349,
609
  "step": 1000
610
  },
611
  {
612
  "epoch": 0.91,
613
  "learning_rate": 9.066427289048474e-06,
614
- "loss": 0.5377,
615
  "step": 1010
616
  },
617
  {
618
  "epoch": 0.92,
619
  "learning_rate": 9.156193895870736e-06,
620
- "loss": 0.4985,
621
  "step": 1020
622
  },
623
  {
624
  "epoch": 0.92,
625
  "learning_rate": 9.245960502692998e-06,
626
- "loss": 0.5533,
627
  "step": 1030
628
  },
629
  {
630
  "epoch": 0.93,
631
  "learning_rate": 9.33572710951526e-06,
632
- "loss": 0.4955,
633
  "step": 1040
634
  },
635
  {
636
  "epoch": 0.94,
637
  "learning_rate": 9.425493716337523e-06,
638
- "loss": 0.541,
639
  "step": 1050
640
  },
641
  {
642
  "epoch": 0.95,
643
  "learning_rate": 9.515260323159784e-06,
644
- "loss": 0.4757,
645
  "step": 1060
646
  },
647
  {
648
  "epoch": 0.96,
649
  "learning_rate": 9.605026929982047e-06,
650
- "loss": 0.5478,
651
  "step": 1070
652
  },
653
  {
654
  "epoch": 0.97,
655
  "learning_rate": 9.694793536804308e-06,
656
- "loss": 0.5281,
657
  "step": 1080
658
  },
659
  {
660
  "epoch": 0.98,
661
  "learning_rate": 9.784560143626571e-06,
662
- "loss": 0.5187,
663
  "step": 1090
664
  },
665
  {
666
  "epoch": 0.99,
667
  "learning_rate": 9.874326750448834e-06,
668
- "loss": 0.4591,
669
  "step": 1100
670
  },
671
  {
672
  "epoch": 1.0,
673
  "learning_rate": 9.964093357271095e-06,
674
- "loss": 0.5344,
675
  "step": 1110
676
  },
677
  {
678
  "epoch": 1.0,
679
  "eval_accuracy": {
680
- "accuracy": 0.8744113029827315
681
  },
682
  "eval_f1": {
683
- "f1": 0.8645201833830978
684
  },
685
- "eval_loss": 0.40481075644493103,
686
  "eval_precision": {
687
- "precision": 0.8668175457367389
688
  },
689
  "eval_recall": {
690
- "recall": 0.8631211355155497
691
  },
692
- "eval_runtime": 243.6123,
693
- "eval_samples_per_second": 73.215,
694
- "eval_steps_per_second": 4.577,
695
  "step": 1114
696
  },
697
  {
698
  "epoch": 1.0,
699
  "learning_rate": 1.0053859964093358e-05,
700
- "loss": 0.5092,
701
  "step": 1120
702
  },
703
  {
704
  "epoch": 1.01,
705
  "learning_rate": 1.0143626570915619e-05,
706
- "loss": 0.4834,
707
  "step": 1130
708
  },
709
  {
710
  "epoch": 1.02,
711
  "learning_rate": 1.0233393177737882e-05,
712
- "loss": 0.5403,
713
  "step": 1140
714
  },
715
  {
716
  "epoch": 1.03,
717
  "learning_rate": 1.0323159784560143e-05,
718
- "loss": 0.5109,
719
  "step": 1150
720
  },
721
  {
722
  "epoch": 1.04,
723
  "learning_rate": 1.0412926391382406e-05,
724
- "loss": 0.5002,
725
  "step": 1160
726
  },
727
  {
728
  "epoch": 1.05,
729
  "learning_rate": 1.0502692998204669e-05,
730
- "loss": 0.5052,
731
  "step": 1170
732
  },
733
  {
734
  "epoch": 1.06,
735
  "learning_rate": 1.059245960502693e-05,
736
- "loss": 0.4642,
737
  "step": 1180
738
  },
739
  {
740
  "epoch": 1.07,
741
  "learning_rate": 1.0682226211849193e-05,
742
- "loss": 0.4479,
743
  "step": 1190
744
  },
745
  {
746
  "epoch": 1.08,
747
  "learning_rate": 1.0771992818671454e-05,
748
- "loss": 0.4944,
749
  "step": 1200
750
  },
751
  {
752
  "epoch": 1.09,
753
  "learning_rate": 1.0861759425493717e-05,
754
- "loss": 0.4583,
755
  "step": 1210
756
  },
757
  {
758
  "epoch": 1.09,
759
  "learning_rate": 1.0951526032315979e-05,
760
- "loss": 0.4779,
761
  "step": 1220
762
  },
763
  {
764
  "epoch": 1.1,
765
  "learning_rate": 1.1041292639138241e-05,
766
- "loss": 0.5038,
767
  "step": 1230
768
  },
769
  {
770
  "epoch": 1.11,
771
  "learning_rate": 1.1131059245960503e-05,
772
- "loss": 0.4831,
773
  "step": 1240
774
  },
775
  {
776
  "epoch": 1.12,
777
  "learning_rate": 1.1220825852782766e-05,
778
- "loss": 0.4999,
779
  "step": 1250
780
  },
781
  {
782
  "epoch": 1.13,
783
  "learning_rate": 1.1310592459605028e-05,
784
- "loss": 0.4611,
785
  "step": 1260
786
  },
787
  {
788
  "epoch": 1.14,
789
  "learning_rate": 1.140035906642729e-05,
790
- "loss": 0.4775,
791
  "step": 1270
792
  },
793
  {
794
  "epoch": 1.15,
795
  "learning_rate": 1.1490125673249553e-05,
796
- "loss": 0.4388,
797
  "step": 1280
798
  },
799
  {
800
  "epoch": 1.16,
801
  "learning_rate": 1.1579892280071814e-05,
802
- "loss": 0.4374,
803
  "step": 1290
804
  },
805
  {
806
  "epoch": 1.17,
807
  "learning_rate": 1.1669658886894077e-05,
808
- "loss": 0.4608,
809
  "step": 1300
810
  },
811
  {
812
  "epoch": 1.18,
813
  "learning_rate": 1.1759425493716338e-05,
814
- "loss": 0.4973,
815
  "step": 1310
816
  },
817
  {
818
  "epoch": 1.18,
819
  "learning_rate": 1.1849192100538601e-05,
820
- "loss": 0.4387,
821
  "step": 1320
822
  },
823
  {
824
  "epoch": 1.19,
825
  "learning_rate": 1.1938958707360862e-05,
826
- "loss": 0.4617,
827
  "step": 1330
828
  },
829
  {
830
  "epoch": 1.2,
831
  "learning_rate": 1.2028725314183125e-05,
832
- "loss": 0.4631,
833
  "step": 1340
834
  },
835
  {
836
  "epoch": 1.21,
837
  "learning_rate": 1.2118491921005388e-05,
838
- "loss": 0.4869,
839
  "step": 1350
840
  },
841
  {
842
  "epoch": 1.22,
843
  "learning_rate": 1.2208258527827649e-05,
844
- "loss": 0.4699,
845
  "step": 1360
846
  },
847
  {
848
  "epoch": 1.23,
849
  "learning_rate": 1.2298025134649912e-05,
850
- "loss": 0.482,
851
  "step": 1370
852
  },
853
  {
854
  "epoch": 1.24,
855
  "learning_rate": 1.2387791741472173e-05,
856
- "loss": 0.4067,
857
  "step": 1380
858
  },
859
  {
860
  "epoch": 1.25,
861
  "learning_rate": 1.2477558348294434e-05,
862
- "loss": 0.4218,
863
  "step": 1390
864
  },
865
  {
866
  "epoch": 1.26,
867
  "learning_rate": 1.2567324955116697e-05,
868
- "loss": 0.4643,
869
  "step": 1400
870
  },
871
  {
872
  "epoch": 1.26,
873
  "learning_rate": 1.2657091561938959e-05,
874
- "loss": 0.4317,
875
  "step": 1410
876
  },
877
  {
878
  "epoch": 1.27,
879
  "learning_rate": 1.2746858168761221e-05,
880
- "loss": 0.4303,
881
  "step": 1420
882
  },
883
  {
884
  "epoch": 1.28,
885
  "learning_rate": 1.2836624775583483e-05,
886
- "loss": 0.4247,
887
  "step": 1430
888
  },
889
  {
890
  "epoch": 1.29,
891
  "learning_rate": 1.2926391382405746e-05,
892
- "loss": 0.4553,
893
  "step": 1440
894
  },
895
  {
896
  "epoch": 1.3,
897
  "learning_rate": 1.3016157989228009e-05,
898
- "loss": 0.4527,
899
  "step": 1450
900
  },
901
  {
902
  "epoch": 1.31,
903
  "learning_rate": 1.310592459605027e-05,
904
- "loss": 0.4216,
905
  "step": 1460
906
  },
907
  {
908
  "epoch": 1.32,
909
  "learning_rate": 1.3195691202872531e-05,
910
- "loss": 0.4481,
911
  "step": 1470
912
  },
913
  {
914
  "epoch": 1.33,
915
  "learning_rate": 1.3285457809694792e-05,
916
- "loss": 0.3926,
917
  "step": 1480
918
  },
919
  {
920
  "epoch": 1.34,
921
  "learning_rate": 1.3375224416517057e-05,
922
- "loss": 0.4644,
923
  "step": 1490
924
  },
925
  {
926
  "epoch": 1.35,
927
  "learning_rate": 1.3464991023339318e-05,
928
- "loss": 0.4156,
929
  "step": 1500
930
  },
931
  {
932
  "epoch": 1.35,
933
  "learning_rate": 1.355475763016158e-05,
934
- "loss": 0.4165,
935
  "step": 1510
936
  },
937
  {
938
  "epoch": 1.36,
939
  "learning_rate": 1.3644524236983844e-05,
940
- "loss": 0.427,
941
  "step": 1520
942
  },
943
  {
944
  "epoch": 1.37,
945
  "learning_rate": 1.3734290843806105e-05,
946
- "loss": 0.4229,
947
  "step": 1530
948
  },
949
  {
950
  "epoch": 1.38,
951
  "learning_rate": 1.3824057450628366e-05,
952
- "loss": 0.3961,
953
  "step": 1540
954
  },
955
  {
956
  "epoch": 1.39,
957
  "learning_rate": 1.3913824057450627e-05,
958
- "loss": 0.4438,
959
  "step": 1550
960
  },
961
  {
962
  "epoch": 1.4,
963
  "learning_rate": 1.4003590664272892e-05,
964
- "loss": 0.4037,
965
  "step": 1560
966
  },
967
  {
968
  "epoch": 1.41,
969
  "learning_rate": 1.4093357271095153e-05,
970
- "loss": 0.4341,
971
  "step": 1570
972
  },
973
  {
974
  "epoch": 1.42,
975
  "learning_rate": 1.4183123877917415e-05,
976
- "loss": 0.3715,
977
  "step": 1580
978
  },
979
  {
980
  "epoch": 1.43,
981
  "learning_rate": 1.4272890484739679e-05,
982
- "loss": 0.4292,
983
  "step": 1590
984
  },
985
  {
986
  "epoch": 1.44,
987
  "learning_rate": 1.436265709156194e-05,
988
- "loss": 0.4539,
989
  "step": 1600
990
  },
991
  {
992
  "epoch": 1.44,
993
  "learning_rate": 1.4452423698384202e-05,
994
- "loss": 0.409,
995
  "step": 1610
996
  },
997
  {
998
  "epoch": 1.45,
999
  "learning_rate": 1.4542190305206463e-05,
1000
- "loss": 0.4473,
1001
  "step": 1620
1002
  },
1003
  {
1004
  "epoch": 1.46,
1005
  "learning_rate": 1.4631956912028727e-05,
1006
- "loss": 0.4202,
1007
  "step": 1630
1008
  },
1009
  {
1010
  "epoch": 1.47,
1011
  "learning_rate": 1.4721723518850989e-05,
1012
- "loss": 0.3701,
1013
  "step": 1640
1014
  },
1015
  {
1016
  "epoch": 1.48,
1017
  "learning_rate": 1.481149012567325e-05,
1018
- "loss": 0.4451,
1019
  "step": 1650
1020
  },
1021
  {
1022
  "epoch": 1.49,
1023
  "learning_rate": 1.4901256732495511e-05,
1024
- "loss": 0.4216,
1025
  "step": 1660
1026
  },
1027
  {
1028
  "epoch": 1.5,
1029
  "learning_rate": 1.4991023339317776e-05,
1030
- "loss": 0.4116,
1031
  "step": 1670
1032
  },
1033
  {
1034
  "epoch": 1.51,
1035
  "learning_rate": 1.5080789946140037e-05,
1036
- "loss": 0.4006,
1037
  "step": 1680
1038
  },
1039
  {
1040
  "epoch": 1.52,
1041
  "learning_rate": 1.5170556552962298e-05,
1042
- "loss": 0.4303,
1043
  "step": 1690
1044
  },
1045
  {
1046
  "epoch": 1.53,
1047
  "learning_rate": 1.5260323159784563e-05,
1048
- "loss": 0.441,
1049
  "step": 1700
1050
  },
1051
  {
1052
  "epoch": 1.53,
1053
  "learning_rate": 1.5350089766606824e-05,
1054
- "loss": 0.4248,
1055
  "step": 1710
1056
  },
1057
  {
1058
  "epoch": 1.54,
1059
  "learning_rate": 1.5439856373429085e-05,
1060
- "loss": 0.3771,
1061
  "step": 1720
1062
  },
1063
  {
1064
  "epoch": 1.55,
1065
  "learning_rate": 1.5529622980251346e-05,
1066
- "loss": 0.4158,
1067
  "step": 1730
1068
  },
1069
  {
1070
  "epoch": 1.56,
1071
  "learning_rate": 1.561938958707361e-05,
1072
- "loss": 0.3815,
1073
  "step": 1740
1074
  },
1075
  {
1076
  "epoch": 1.57,
1077
  "learning_rate": 1.5709156193895872e-05,
1078
- "loss": 0.4204,
1079
  "step": 1750
1080
  },
1081
  {
1082
  "epoch": 1.58,
1083
  "learning_rate": 1.5798922800718133e-05,
1084
- "loss": 0.4886,
1085
  "step": 1760
1086
  },
1087
  {
1088
  "epoch": 1.59,
1089
  "learning_rate": 1.5888689407540398e-05,
1090
- "loss": 0.4382,
1091
  "step": 1770
1092
  },
1093
  {
1094
  "epoch": 1.6,
1095
  "learning_rate": 1.597845601436266e-05,
1096
- "loss": 0.4502,
1097
  "step": 1780
1098
  },
1099
  {
1100
  "epoch": 1.61,
1101
  "learning_rate": 1.606822262118492e-05,
1102
- "loss": 0.4155,
1103
  "step": 1790
1104
  },
1105
  {
1106
  "epoch": 1.61,
1107
  "learning_rate": 1.615798922800718e-05,
1108
- "loss": 0.3801,
1109
  "step": 1800
1110
  },
1111
  {
1112
  "epoch": 1.62,
1113
  "learning_rate": 1.6247755834829446e-05,
1114
- "loss": 0.3968,
1115
  "step": 1810
1116
  },
1117
  {
1118
  "epoch": 1.63,
1119
  "learning_rate": 1.6337522441651707e-05,
1120
- "loss": 0.365,
1121
  "step": 1820
1122
  },
1123
  {
1124
  "epoch": 1.64,
1125
  "learning_rate": 1.642728904847397e-05,
1126
- "loss": 0.3638,
1127
  "step": 1830
1128
  },
1129
  {
1130
  "epoch": 1.65,
1131
  "learning_rate": 1.6517055655296233e-05,
1132
- "loss": 0.3935,
1133
  "step": 1840
1134
  },
1135
  {
1136
  "epoch": 1.66,
1137
  "learning_rate": 1.6606822262118494e-05,
1138
- "loss": 0.4092,
1139
  "step": 1850
1140
  },
1141
  {
1142
  "epoch": 1.67,
1143
  "learning_rate": 1.6696588868940756e-05,
1144
- "loss": 0.3617,
1145
  "step": 1860
1146
  },
1147
  {
1148
  "epoch": 1.68,
1149
  "learning_rate": 1.6786355475763017e-05,
1150
- "loss": 0.389,
1151
  "step": 1870
1152
  },
1153
  {
1154
  "epoch": 1.69,
1155
  "learning_rate": 1.687612208258528e-05,
1156
- "loss": 0.3357,
1157
  "step": 1880
1158
  },
1159
  {
1160
  "epoch": 1.7,
1161
  "learning_rate": 1.6965888689407543e-05,
1162
- "loss": 0.3925,
1163
  "step": 1890
1164
  },
1165
  {
1166
  "epoch": 1.7,
1167
  "learning_rate": 1.7055655296229804e-05,
1168
- "loss": 0.4186,
1169
  "step": 1900
1170
  },
1171
  {
1172
  "epoch": 1.71,
1173
  "learning_rate": 1.7145421903052065e-05,
1174
- "loss": 0.3913,
1175
  "step": 1910
1176
  },
1177
  {
1178
  "epoch": 1.72,
1179
  "learning_rate": 1.723518850987433e-05,
1180
- "loss": 0.3679,
1181
  "step": 1920
1182
  },
1183
  {
1184
  "epoch": 1.73,
1185
  "learning_rate": 1.732495511669659e-05,
1186
- "loss": 0.4476,
1187
  "step": 1930
1188
  },
1189
  {
1190
  "epoch": 1.74,
1191
  "learning_rate": 1.7414721723518852e-05,
1192
- "loss": 0.4065,
1193
  "step": 1940
1194
  },
1195
  {
1196
  "epoch": 1.75,
1197
  "learning_rate": 1.7504488330341113e-05,
1198
- "loss": 0.4079,
1199
  "step": 1950
1200
  },
1201
  {
1202
  "epoch": 1.76,
1203
  "learning_rate": 1.7594254937163378e-05,
1204
- "loss": 0.3831,
1205
  "step": 1960
1206
  },
1207
  {
1208
  "epoch": 1.77,
1209
  "learning_rate": 1.768402154398564e-05,
1210
- "loss": 0.3985,
1211
  "step": 1970
1212
  },
1213
  {
1214
  "epoch": 1.78,
1215
  "learning_rate": 1.77737881508079e-05,
1216
- "loss": 0.3705,
1217
  "step": 1980
1218
  },
1219
  {
1220
  "epoch": 1.79,
1221
  "learning_rate": 1.786355475763016e-05,
1222
- "loss": 0.3742,
1223
  "step": 1990
1224
  },
1225
  {
1226
  "epoch": 1.79,
1227
  "learning_rate": 1.7953321364452423e-05,
1228
- "loss": 0.3517,
1229
  "step": 2000
1230
  },
1231
  {
1232
  "epoch": 1.8,
1233
  "learning_rate": 1.8043087971274687e-05,
1234
- "loss": 0.3871,
1235
  "step": 2010
1236
  },
1237
  {
1238
  "epoch": 1.81,
1239
  "learning_rate": 1.813285457809695e-05,
1240
- "loss": 0.3416,
1241
  "step": 2020
1242
  },
1243
  {
1244
  "epoch": 1.82,
1245
  "learning_rate": 1.822262118491921e-05,
1246
- "loss": 0.3856,
1247
  "step": 2030
1248
  },
1249
  {
1250
  "epoch": 1.83,
1251
  "learning_rate": 1.831238779174147e-05,
1252
- "loss": 0.4245,
1253
  "step": 2040
1254
  },
1255
  {
1256
  "epoch": 1.84,
1257
  "learning_rate": 1.8402154398563732e-05,
1258
- "loss": 0.3645,
1259
  "step": 2050
1260
  },
1261
  {
1262
  "epoch": 1.85,
1263
  "learning_rate": 1.8491921005385997e-05,
1264
- "loss": 0.3505,
1265
  "step": 2060
1266
  },
1267
  {
1268
  "epoch": 1.86,
1269
  "learning_rate": 1.8581687612208258e-05,
1270
- "loss": 0.3746,
1271
  "step": 2070
1272
  },
1273
  {
1274
  "epoch": 1.87,
1275
  "learning_rate": 1.867145421903052e-05,
1276
- "loss": 0.352,
1277
  "step": 2080
1278
  },
1279
  {
1280
  "epoch": 1.87,
1281
  "learning_rate": 1.8761220825852784e-05,
1282
- "loss": 0.4295,
1283
  "step": 2090
1284
  },
1285
  {
1286
  "epoch": 1.88,
1287
  "learning_rate": 1.8850987432675045e-05,
1288
- "loss": 0.387,
1289
  "step": 2100
1290
  },
1291
  {
1292
  "epoch": 1.89,
1293
  "learning_rate": 1.8940754039497306e-05,
1294
- "loss": 0.4056,
1295
  "step": 2110
1296
  },
1297
  {
1298
  "epoch": 1.9,
1299
  "learning_rate": 1.9030520646319568e-05,
1300
- "loss": 0.3913,
1301
  "step": 2120
1302
  },
1303
  {
1304
  "epoch": 1.91,
1305
  "learning_rate": 1.9120287253141832e-05,
1306
- "loss": 0.3789,
1307
  "step": 2130
1308
  },
1309
  {
1310
  "epoch": 1.92,
1311
  "learning_rate": 1.9210053859964093e-05,
1312
- "loss": 0.3688,
1313
  "step": 2140
1314
  },
1315
  {
1316
  "epoch": 1.93,
1317
  "learning_rate": 1.9299820466786355e-05,
1318
- "loss": 0.3499,
1319
  "step": 2150
1320
  },
1321
  {
1322
  "epoch": 1.94,
1323
  "learning_rate": 1.9389587073608616e-05,
1324
- "loss": 0.3153,
1325
  "step": 2160
1326
  },
1327
  {
1328
  "epoch": 1.95,
1329
  "learning_rate": 1.947935368043088e-05,
1330
- "loss": 0.3558,
1331
  "step": 2170
1332
  },
1333
  {
1334
  "epoch": 1.96,
1335
  "learning_rate": 1.9569120287253142e-05,
1336
- "loss": 0.3001,
1337
  "step": 2180
1338
  },
1339
  {
1340
  "epoch": 1.96,
1341
  "learning_rate": 1.9658886894075403e-05,
1342
- "loss": 0.3109,
1343
  "step": 2190
1344
  },
1345
  {
1346
  "epoch": 1.97,
1347
  "learning_rate": 1.9748653500897668e-05,
1348
- "loss": 0.3377,
1349
  "step": 2200
1350
  },
1351
  {
1352
  "epoch": 1.98,
1353
  "learning_rate": 1.983842010771993e-05,
1354
- "loss": 0.3888,
1355
  "step": 2210
1356
  },
1357
  {
1358
  "epoch": 1.99,
1359
  "learning_rate": 1.992818671454219e-05,
1360
- "loss": 0.3536,
1361
  "step": 2220
1362
  },
1363
  {
1364
  "epoch": 2.0,
1365
  "eval_accuracy": {
1366
- "accuracy": 0.9219555954249832
1367
  },
1368
  "eval_f1": {
1369
- "f1": 0.9164276462698699
1370
  },
1371
- "eval_loss": 0.2440667599439621,
1372
  "eval_precision": {
1373
- "precision": 0.9196643633917082
1374
  },
1375
  "eval_recall": {
1376
- "recall": 0.913748377704386
1377
  },
1378
- "eval_runtime": 164.5475,
1379
- "eval_samples_per_second": 108.394,
1380
- "eval_steps_per_second": 6.776,
1381
  "step": 2229
1382
  },
1383
  {
1384
  "epoch": 2.0,
1385
  "learning_rate": 2.001795332136445e-05,
1386
- "loss": 0.3716,
1387
  "step": 2230
1388
  },
1389
  {
1390
  "epoch": 2.01,
1391
  "learning_rate": 2.0107719928186716e-05,
1392
- "loss": 0.3336,
1393
  "step": 2240
1394
  },
1395
  {
1396
  "epoch": 2.02,
1397
  "learning_rate": 2.0197486535008977e-05,
1398
- "loss": 0.3309,
1399
  "step": 2250
1400
  },
1401
  {
1402
  "epoch": 2.03,
1403
  "learning_rate": 2.0287253141831238e-05,
1404
- "loss": 0.4064,
1405
  "step": 2260
1406
  },
1407
  {
1408
  "epoch": 2.04,
1409
  "learning_rate": 2.0377019748653503e-05,
1410
- "loss": 0.33,
1411
  "step": 2270
1412
  },
1413
  {
1414
  "epoch": 2.05,
1415
  "learning_rate": 2.0466786355475764e-05,
1416
- "loss": 0.3911,
1417
  "step": 2280
1418
  },
1419
  {
1420
  "epoch": 2.05,
1421
  "learning_rate": 2.0556552962298025e-05,
1422
- "loss": 0.3405,
1423
  "step": 2290
1424
  },
1425
  {
1426
  "epoch": 2.06,
1427
  "learning_rate": 2.0646319569120286e-05,
1428
- "loss": 0.317,
1429
  "step": 2300
1430
  },
1431
  {
1432
  "epoch": 2.07,
1433
  "learning_rate": 2.073608617594255e-05,
1434
- "loss": 0.3305,
1435
  "step": 2310
1436
  },
1437
  {
1438
  "epoch": 2.08,
1439
  "learning_rate": 2.0825852782764812e-05,
1440
- "loss": 0.3201,
1441
  "step": 2320
1442
  },
1443
  {
1444
  "epoch": 2.09,
1445
  "learning_rate": 2.0915619389587073e-05,
1446
- "loss": 0.3107,
1447
  "step": 2330
1448
  },
1449
  {
1450
  "epoch": 2.1,
1451
  "learning_rate": 2.1005385996409338e-05,
1452
- "loss": 0.351,
1453
  "step": 2340
1454
  },
1455
  {
1456
  "epoch": 2.11,
1457
  "learning_rate": 2.10951526032316e-05,
1458
- "loss": 0.3181,
1459
  "step": 2350
1460
  },
1461
  {
1462
  "epoch": 2.12,
1463
  "learning_rate": 2.118491921005386e-05,
1464
- "loss": 0.3494,
1465
  "step": 2360
1466
  },
1467
  {
1468
  "epoch": 2.13,
1469
  "learning_rate": 2.1274685816876122e-05,
1470
- "loss": 0.3718,
1471
  "step": 2370
1472
  },
1473
  {
1474
  "epoch": 2.14,
1475
  "learning_rate": 2.1364452423698386e-05,
1476
- "loss": 0.34,
1477
  "step": 2380
1478
  },
1479
  {
1480
  "epoch": 2.14,
1481
  "learning_rate": 2.1454219030520648e-05,
1482
- "loss": 0.306,
1483
  "step": 2390
1484
  },
1485
  {
1486
  "epoch": 2.15,
1487
  "learning_rate": 2.154398563734291e-05,
1488
- "loss": 0.3147,
1489
  "step": 2400
1490
  },
1491
  {
1492
  "epoch": 2.16,
1493
  "learning_rate": 2.163375224416517e-05,
1494
- "loss": 0.2842,
1495
  "step": 2410
1496
  },
1497
  {
1498
  "epoch": 2.17,
1499
  "learning_rate": 2.1723518850987435e-05,
1500
- "loss": 0.3461,
1501
  "step": 2420
1502
  },
1503
  {
1504
  "epoch": 2.18,
1505
  "learning_rate": 2.1813285457809696e-05,
1506
- "loss": 0.3161,
1507
  "step": 2430
1508
  },
1509
  {
1510
  "epoch": 2.19,
1511
  "learning_rate": 2.1903052064631957e-05,
1512
- "loss": 0.3152,
1513
  "step": 2440
1514
  },
1515
  {
1516
  "epoch": 2.2,
1517
  "learning_rate": 2.199281867145422e-05,
1518
- "loss": 0.3104,
1519
  "step": 2450
1520
  },
1521
  {
1522
  "epoch": 2.21,
1523
  "learning_rate": 2.2082585278276483e-05,
1524
- "loss": 0.3485,
1525
  "step": 2460
1526
  },
1527
  {
1528
  "epoch": 2.22,
1529
  "learning_rate": 2.2172351885098744e-05,
1530
- "loss": 0.3682,
1531
  "step": 2470
1532
  },
1533
  {
1534
  "epoch": 2.22,
1535
  "learning_rate": 2.2262118491921005e-05,
1536
- "loss": 0.2831,
1537
  "step": 2480
1538
  },
1539
  {
1540
  "epoch": 2.23,
1541
  "learning_rate": 2.235188509874327e-05,
1542
- "loss": 0.3097,
1543
  "step": 2490
1544
  },
1545
  {
1546
  "epoch": 2.24,
1547
  "learning_rate": 2.244165170556553e-05,
1548
- "loss": 0.3433,
1549
  "step": 2500
1550
  },
1551
  {
1552
  "epoch": 2.25,
1553
  "learning_rate": 2.2531418312387792e-05,
1554
- "loss": 0.3319,
1555
  "step": 2510
1556
  },
1557
  {
1558
  "epoch": 2.26,
1559
  "learning_rate": 2.2621184919210057e-05,
1560
- "loss": 0.3236,
1561
  "step": 2520
1562
  },
1563
  {
1564
  "epoch": 2.27,
1565
  "learning_rate": 2.2710951526032318e-05,
1566
- "loss": 0.3531,
1567
  "step": 2530
1568
  },
1569
  {
1570
  "epoch": 2.28,
1571
  "learning_rate": 2.280071813285458e-05,
1572
- "loss": 0.2737,
1573
  "step": 2540
1574
  },
1575
  {
1576
  "epoch": 2.29,
1577
  "learning_rate": 2.289048473967684e-05,
1578
- "loss": 0.3774,
1579
  "step": 2550
1580
  },
1581
  {
1582
  "epoch": 2.3,
1583
  "learning_rate": 2.2980251346499105e-05,
1584
- "loss": 0.2936,
1585
  "step": 2560
1586
  },
1587
  {
1588
  "epoch": 2.31,
1589
  "learning_rate": 2.3070017953321366e-05,
1590
- "loss": 0.3015,
1591
  "step": 2570
1592
  },
1593
  {
1594
  "epoch": 2.31,
1595
  "learning_rate": 2.3159784560143628e-05,
1596
- "loss": 0.2933,
1597
  "step": 2580
1598
  },
1599
  {
1600
  "epoch": 2.32,
1601
  "learning_rate": 2.3249551166965892e-05,
1602
- "loss": 0.3348,
1603
  "step": 2590
1604
  },
1605
  {
1606
  "epoch": 2.33,
1607
  "learning_rate": 2.3339317773788153e-05,
1608
- "loss": 0.3249,
1609
  "step": 2600
1610
  },
1611
  {
1612
  "epoch": 2.34,
1613
  "learning_rate": 2.3429084380610415e-05,
1614
- "loss": 0.3619,
1615
  "step": 2610
1616
  },
1617
  {
1618
  "epoch": 2.35,
1619
  "learning_rate": 2.3518850987432676e-05,
1620
- "loss": 0.3428,
1621
  "step": 2620
1622
  },
1623
  {
1624
  "epoch": 2.36,
1625
  "learning_rate": 2.360861759425494e-05,
1626
- "loss": 0.3425,
1627
  "step": 2630
1628
  },
1629
  {
1630
  "epoch": 2.37,
1631
  "learning_rate": 2.3698384201077202e-05,
1632
- "loss": 0.3027,
1633
  "step": 2640
1634
  },
1635
  {
1636
  "epoch": 2.38,
1637
  "learning_rate": 2.3788150807899463e-05,
1638
- "loss": 0.3723,
1639
  "step": 2650
1640
  },
1641
  {
1642
  "epoch": 2.39,
1643
  "learning_rate": 2.3877917414721724e-05,
1644
- "loss": 0.3813,
1645
  "step": 2660
1646
  },
1647
  {
1648
  "epoch": 2.4,
1649
  "learning_rate": 2.396768402154399e-05,
1650
- "loss": 0.3423,
1651
  "step": 2670
1652
  },
1653
  {
1654
  "epoch": 2.4,
1655
  "learning_rate": 2.405745062836625e-05,
1656
- "loss": 0.355,
1657
  "step": 2680
1658
  },
1659
  {
1660
  "epoch": 2.41,
1661
  "learning_rate": 2.414721723518851e-05,
1662
- "loss": 0.3047,
1663
  "step": 2690
1664
  },
1665
  {
1666
  "epoch": 2.42,
1667
  "learning_rate": 2.4236983842010776e-05,
1668
- "loss": 0.3049,
1669
  "step": 2700
1670
  },
1671
  {
1672
  "epoch": 2.43,
1673
  "learning_rate": 2.4326750448833037e-05,
1674
- "loss": 0.2876,
1675
  "step": 2710
1676
  },
1677
  {
1678
  "epoch": 2.44,
1679
  "learning_rate": 2.4416517055655298e-05,
1680
- "loss": 0.3166,
1681
  "step": 2720
1682
  },
1683
  {
1684
  "epoch": 2.45,
1685
  "learning_rate": 2.450628366247756e-05,
1686
- "loss": 0.3468,
1687
  "step": 2730
1688
  },
1689
  {
1690
  "epoch": 2.46,
1691
  "learning_rate": 2.4596050269299824e-05,
1692
- "loss": 0.3173,
1693
  "step": 2740
1694
  },
1695
  {
1696
  "epoch": 2.47,
1697
  "learning_rate": 2.4685816876122085e-05,
1698
- "loss": 0.3189,
1699
  "step": 2750
1700
  },
1701
  {
1702
  "epoch": 2.48,
1703
  "learning_rate": 2.4775583482944346e-05,
1704
- "loss": 0.3321,
1705
  "step": 2760
1706
  },
1707
  {
1708
  "epoch": 2.48,
1709
  "learning_rate": 2.4865350089766608e-05,
1710
- "loss": 0.3314,
1711
  "step": 2770
1712
  },
1713
  {
1714
  "epoch": 2.49,
1715
  "learning_rate": 2.495511669658887e-05,
1716
- "loss": 0.3338,
1717
  "step": 2780
1718
  },
1719
  {
1720
  "epoch": 2.5,
1721
  "learning_rate": 2.5044883303411134e-05,
1722
- "loss": 0.3212,
1723
  "step": 2790
1724
  },
1725
  {
1726
  "epoch": 2.51,
1727
  "learning_rate": 2.5134649910233395e-05,
1728
- "loss": 0.3814,
1729
  "step": 2800
1730
  },
1731
  {
1732
  "epoch": 2.52,
1733
  "learning_rate": 2.5224416517055656e-05,
1734
- "loss": 0.3548,
1735
  "step": 2810
1736
  },
1737
  {
1738
  "epoch": 2.53,
1739
  "learning_rate": 2.5314183123877917e-05,
1740
- "loss": 0.3111,
1741
  "step": 2820
1742
  },
1743
  {
1744
  "epoch": 2.54,
1745
  "learning_rate": 2.5403949730700182e-05,
1746
- "loss": 0.2813,
1747
  "step": 2830
1748
  },
1749
  {
1750
  "epoch": 2.55,
1751
  "learning_rate": 2.5493716337522443e-05,
1752
- "loss": 0.2877,
1753
  "step": 2840
1754
  },
1755
  {
1756
  "epoch": 2.56,
1757
  "learning_rate": 2.5583482944344704e-05,
1758
- "loss": 0.3169,
1759
  "step": 2850
1760
  },
1761
  {
1762
  "epoch": 2.57,
1763
  "learning_rate": 2.5673249551166965e-05,
1764
- "loss": 0.2842,
1765
  "step": 2860
1766
  },
1767
  {
1768
  "epoch": 2.57,
1769
  "learning_rate": 2.5763016157989227e-05,
1770
- "loss": 0.3174,
1771
  "step": 2870
1772
  },
1773
  {
1774
  "epoch": 2.58,
1775
  "learning_rate": 2.585278276481149e-05,
1776
- "loss": 0.3094,
1777
  "step": 2880
1778
  },
1779
  {
1780
  "epoch": 2.59,
1781
  "learning_rate": 2.5942549371633752e-05,
1782
- "loss": 0.3405,
1783
  "step": 2890
1784
  },
1785
  {
1786
  "epoch": 2.6,
1787
  "learning_rate": 2.6032315978456017e-05,
1788
- "loss": 0.3013,
1789
  "step": 2900
1790
  },
1791
  {
1792
  "epoch": 2.61,
1793
  "learning_rate": 2.6122082585278278e-05,
1794
- "loss": 0.322,
1795
  "step": 2910
1796
  },
1797
  {
1798
  "epoch": 2.62,
1799
  "learning_rate": 2.621184919210054e-05,
1800
- "loss": 0.2959,
1801
  "step": 2920
1802
  },
1803
  {
1804
  "epoch": 2.63,
1805
  "learning_rate": 2.63016157989228e-05,
1806
- "loss": 0.3343,
1807
  "step": 2930
1808
  },
1809
  {
1810
  "epoch": 2.64,
1811
  "learning_rate": 2.6391382405745062e-05,
1812
- "loss": 0.3079,
1813
  "step": 2940
1814
  },
1815
  {
1816
  "epoch": 2.65,
1817
  "learning_rate": 2.6481149012567323e-05,
1818
- "loss": 0.2994,
1819
  "step": 2950
1820
  },
1821
  {
1822
  "epoch": 2.66,
1823
  "learning_rate": 2.6570915619389584e-05,
1824
- "loss": 0.3091,
1825
  "step": 2960
1826
  },
1827
  {
1828
  "epoch": 2.66,
1829
  "learning_rate": 2.6660682226211852e-05,
1830
- "loss": 0.3317,
1831
  "step": 2970
1832
  },
1833
  {
1834
  "epoch": 2.67,
1835
  "learning_rate": 2.6750448833034114e-05,
1836
- "loss": 0.2687,
1837
  "step": 2980
1838
  },
1839
  {
1840
  "epoch": 2.68,
1841
  "learning_rate": 2.6840215439856375e-05,
1842
- "loss": 0.2859,
1843
  "step": 2990
1844
  },
1845
  {
1846
  "epoch": 2.69,
1847
  "learning_rate": 2.6929982046678636e-05,
1848
- "loss": 0.2997,
1849
  "step": 3000
1850
  },
1851
  {
1852
  "epoch": 2.7,
1853
  "learning_rate": 2.7019748653500897e-05,
1854
- "loss": 0.2603,
1855
  "step": 3010
1856
  },
1857
  {
1858
  "epoch": 2.71,
1859
  "learning_rate": 2.710951526032316e-05,
1860
- "loss": 0.3452,
1861
  "step": 3020
1862
  },
1863
  {
1864
  "epoch": 2.72,
1865
  "learning_rate": 2.719928186714542e-05,
1866
- "loss": 0.3475,
1867
  "step": 3030
1868
  },
1869
  {
1870
  "epoch": 2.73,
1871
  "learning_rate": 2.7289048473967688e-05,
1872
- "loss": 0.317,
1873
  "step": 3040
1874
  },
1875
  {
1876
  "epoch": 2.74,
1877
  "learning_rate": 2.737881508078995e-05,
1878
- "loss": 0.3266,
1879
  "step": 3050
1880
  },
1881
  {
1882
  "epoch": 2.75,
1883
  "learning_rate": 2.746858168761221e-05,
1884
- "loss": 0.291,
1885
  "step": 3060
1886
  },
1887
  {
1888
  "epoch": 2.75,
1889
  "learning_rate": 2.755834829443447e-05,
1890
- "loss": 0.3323,
1891
  "step": 3070
1892
  },
1893
  {
1894
  "epoch": 2.76,
1895
  "learning_rate": 2.7648114901256732e-05,
1896
- "loss": 0.32,
1897
  "step": 3080
1898
  },
1899
  {
1900
  "epoch": 2.77,
1901
  "learning_rate": 2.7737881508078994e-05,
1902
- "loss": 0.2792,
1903
  "step": 3090
1904
  },
1905
  {
1906
  "epoch": 2.78,
1907
  "learning_rate": 2.7827648114901255e-05,
1908
- "loss": 0.2858,
1909
  "step": 3100
1910
  },
1911
  {
1912
  "epoch": 2.79,
1913
  "learning_rate": 2.7917414721723523e-05,
1914
- "loss": 0.2447,
1915
  "step": 3110
1916
  },
1917
  {
1918
  "epoch": 2.8,
1919
  "learning_rate": 2.8007181328545784e-05,
1920
- "loss": 0.3039,
1921
  "step": 3120
1922
  },
1923
  {
1924
  "epoch": 2.81,
1925
  "learning_rate": 2.8096947935368045e-05,
1926
- "loss": 0.323,
1927
  "step": 3130
1928
  },
1929
  {
1930
  "epoch": 2.82,
1931
  "learning_rate": 2.8186714542190307e-05,
1932
- "loss": 0.3041,
1933
  "step": 3140
1934
  },
1935
  {
1936
  "epoch": 2.83,
1937
  "learning_rate": 2.8276481149012568e-05,
1938
- "loss": 0.2496,
1939
  "step": 3150
1940
  },
1941
  {
1942
  "epoch": 2.83,
1943
  "learning_rate": 2.836624775583483e-05,
1944
- "loss": 0.3205,
1945
  "step": 3160
1946
  },
1947
  {
1948
  "epoch": 2.84,
1949
  "learning_rate": 2.845601436265709e-05,
1950
- "loss": 0.3079,
1951
  "step": 3170
1952
  },
1953
  {
1954
  "epoch": 2.85,
1955
  "learning_rate": 2.8545780969479358e-05,
1956
- "loss": 0.3112,
1957
  "step": 3180
1958
  },
1959
  {
1960
  "epoch": 2.86,
1961
  "learning_rate": 2.863554757630162e-05,
1962
- "loss": 0.2674,
1963
  "step": 3190
1964
  },
1965
  {
1966
  "epoch": 2.87,
1967
  "learning_rate": 2.872531418312388e-05,
1968
- "loss": 0.3094,
1969
  "step": 3200
1970
  },
1971
  {
1972
  "epoch": 2.88,
1973
  "learning_rate": 2.8815080789946142e-05,
1974
- "loss": 0.2895,
1975
  "step": 3210
1976
  },
1977
  {
1978
  "epoch": 2.89,
1979
  "learning_rate": 2.8904847396768403e-05,
1980
- "loss": 0.3456,
1981
  "step": 3220
1982
  },
1983
  {
1984
  "epoch": 2.9,
1985
  "learning_rate": 2.8994614003590664e-05,
1986
- "loss": 0.2602,
1987
  "step": 3230
1988
  },
1989
  {
1990
  "epoch": 2.91,
1991
  "learning_rate": 2.9084380610412926e-05,
1992
- "loss": 0.3198,
1993
  "step": 3240
1994
  },
1995
  {
1996
  "epoch": 2.92,
1997
  "learning_rate": 2.9174147217235194e-05,
1998
- "loss": 0.3463,
1999
  "step": 3250
2000
  },
2001
  {
2002
  "epoch": 2.92,
2003
  "learning_rate": 2.9263913824057455e-05,
2004
- "loss": 0.3456,
2005
  "step": 3260
2006
  },
2007
  {
2008
  "epoch": 2.93,
2009
  "learning_rate": 2.9353680430879716e-05,
2010
- "loss": 0.2803,
2011
  "step": 3270
2012
  },
2013
  {
2014
  "epoch": 2.94,
2015
  "learning_rate": 2.9443447037701977e-05,
2016
- "loss": 0.302,
2017
  "step": 3280
2018
  },
2019
  {
2020
  "epoch": 2.95,
2021
  "learning_rate": 2.953321364452424e-05,
2022
- "loss": 0.2851,
2023
  "step": 3290
2024
  },
2025
  {
2026
  "epoch": 2.96,
2027
  "learning_rate": 2.96229802513465e-05,
2028
- "loss": 0.2752,
2029
  "step": 3300
2030
  },
2031
  {
2032
  "epoch": 2.97,
2033
  "learning_rate": 2.971274685816876e-05,
2034
- "loss": 0.2655,
2035
  "step": 3310
2036
  },
2037
  {
2038
  "epoch": 2.98,
2039
  "learning_rate": 2.9802513464991022e-05,
2040
- "loss": 0.2716,
2041
  "step": 3320
2042
  },
2043
  {
2044
  "epoch": 2.99,
2045
  "learning_rate": 2.989228007181329e-05,
2046
- "loss": 0.2591,
2047
  "step": 3330
2048
  },
2049
  {
2050
  "epoch": 3.0,
2051
  "learning_rate": 2.998204667863555e-05,
2052
- "loss": 0.2794,
2053
  "step": 3340
2054
  },
2055
  {
2056
  "epoch": 3.0,
2057
  "eval_accuracy": {
2058
- "accuracy": 0.9377663153173357
2059
  },
2060
  "eval_f1": {
2061
- "f1": 0.9340905709341878
2062
  },
2063
- "eval_loss": 0.18664708733558655,
2064
  "eval_precision": {
2065
- "precision": 0.9374045110718889
2066
  },
2067
  "eval_recall": {
2068
- "recall": 0.9314497447723507
2069
  },
2070
- "eval_runtime": 164.7744,
2071
- "eval_samples_per_second": 108.245,
2072
- "eval_steps_per_second": 6.767,
2073
  "step": 3344
2074
  },
2075
  {
2076
  "epoch": 3.01,
2077
  "learning_rate": 3.0071813285457812e-05,
2078
- "loss": 0.3214,
2079
  "step": 3350
2080
  },
2081
  {
2082
  "epoch": 3.01,
2083
  "learning_rate": 3.0161579892280074e-05,
2084
- "loss": 0.3234,
2085
  "step": 3360
2086
  },
2087
  {
2088
  "epoch": 3.02,
2089
  "learning_rate": 3.0251346499102335e-05,
2090
- "loss": 0.2471,
2091
  "step": 3370
2092
  },
2093
  {
2094
  "epoch": 3.03,
2095
  "learning_rate": 3.0341113105924596e-05,
2096
- "loss": 0.2891,
2097
  "step": 3380
2098
  },
2099
  {
2100
  "epoch": 3.04,
2101
  "learning_rate": 3.0430879712746857e-05,
2102
- "loss": 0.2794,
2103
  "step": 3390
2104
  },
2105
  {
2106
  "epoch": 3.05,
2107
  "learning_rate": 3.0520646319569125e-05,
2108
- "loss": 0.2889,
2109
  "step": 3400
2110
  },
2111
  {
2112
  "epoch": 3.06,
2113
  "learning_rate": 3.061041292639138e-05,
2114
- "loss": 0.3119,
2115
  "step": 3410
2116
  },
2117
  {
2118
  "epoch": 3.07,
2119
  "learning_rate": 3.070017953321365e-05,
2120
- "loss": 0.2721,
2121
  "step": 3420
2122
  },
2123
  {
2124
  "epoch": 3.08,
2125
  "learning_rate": 3.0789946140035906e-05,
2126
- "loss": 0.319,
2127
  "step": 3430
2128
  },
2129
  {
2130
  "epoch": 3.09,
2131
  "learning_rate": 3.087971274685817e-05,
2132
- "loss": 0.2623,
2133
  "step": 3440
2134
  },
2135
  {
2136
  "epoch": 3.09,
2137
  "learning_rate": 3.096947935368043e-05,
2138
- "loss": 0.2689,
2139
  "step": 3450
2140
  },
2141
  {
2142
  "epoch": 3.1,
2143
  "learning_rate": 3.105924596050269e-05,
2144
- "loss": 0.2319,
2145
  "step": 3460
2146
  },
2147
  {
2148
  "epoch": 3.11,
2149
  "learning_rate": 3.114901256732496e-05,
2150
- "loss": 0.2781,
2151
  "step": 3470
2152
  },
2153
  {
2154
  "epoch": 3.12,
2155
  "learning_rate": 3.123877917414722e-05,
2156
- "loss": 0.3058,
2157
  "step": 3480
2158
  },
2159
  {
2160
  "epoch": 3.13,
2161
  "learning_rate": 3.132854578096948e-05,
2162
- "loss": 0.2508,
2163
  "step": 3490
2164
  },
2165
  {
2166
  "epoch": 3.14,
2167
  "learning_rate": 3.1418312387791744e-05,
2168
- "loss": 0.2737,
2169
  "step": 3500
2170
  },
2171
  {
2172
  "epoch": 3.15,
2173
  "learning_rate": 3.1508078994614e-05,
2174
- "loss": 0.2592,
2175
  "step": 3510
2176
  },
2177
  {
2178
  "epoch": 3.16,
2179
  "learning_rate": 3.159784560143627e-05,
2180
- "loss": 0.2965,
2181
  "step": 3520
2182
  },
2183
  {
2184
  "epoch": 3.17,
2185
  "learning_rate": 3.1687612208258525e-05,
2186
- "loss": 0.2821,
2187
  "step": 3530
2188
  },
2189
  {
2190
  "epoch": 3.18,
2191
  "learning_rate": 3.1777378815080796e-05,
2192
- "loss": 0.2622,
2193
  "step": 3540
2194
  },
2195
  {
2196
  "epoch": 3.18,
2197
  "learning_rate": 3.1867145421903054e-05,
2198
- "loss": 0.3081,
2199
  "step": 3550
2200
  },
2201
  {
2202
  "epoch": 3.19,
2203
  "learning_rate": 3.195691202872532e-05,
2204
- "loss": 0.3187,
2205
  "step": 3560
2206
  },
2207
  {
2208
  "epoch": 3.2,
2209
  "learning_rate": 3.2046678635547576e-05,
2210
- "loss": 0.2525,
2211
  "step": 3570
2212
  },
2213
  {
2214
  "epoch": 3.21,
2215
  "learning_rate": 3.213644524236984e-05,
2216
- "loss": 0.2896,
2217
  "step": 3580
2218
  },
2219
  {
2220
  "epoch": 3.22,
2221
  "learning_rate": 3.22262118491921e-05,
2222
- "loss": 0.2649,
2223
  "step": 3590
2224
  },
2225
  {
2226
  "epoch": 3.23,
2227
  "learning_rate": 3.231597845601436e-05,
2228
- "loss": 0.3238,
2229
  "step": 3600
2230
  },
2231
  {
2232
  "epoch": 3.24,
2233
  "learning_rate": 3.240574506283663e-05,
2234
- "loss": 0.298,
2235
  "step": 3610
2236
  },
2237
  {
2238
  "epoch": 3.25,
2239
  "learning_rate": 3.249551166965889e-05,
2240
- "loss": 0.3019,
2241
  "step": 3620
2242
  },
2243
  {
2244
  "epoch": 3.26,
2245
  "learning_rate": 3.258527827648115e-05,
2246
- "loss": 0.2835,
2247
  "step": 3630
2248
  },
2249
  {
2250
  "epoch": 3.27,
2251
  "learning_rate": 3.2675044883303415e-05,
2252
- "loss": 0.2717,
2253
  "step": 3640
2254
  },
2255
  {
2256
  "epoch": 3.27,
2257
  "learning_rate": 3.276481149012567e-05,
2258
- "loss": 0.2582,
2259
  "step": 3650
2260
  },
2261
  {
2262
  "epoch": 3.28,
2263
  "learning_rate": 3.285457809694794e-05,
2264
- "loss": 0.2473,
2265
  "step": 3660
2266
  },
2267
  {
2268
  "epoch": 3.29,
2269
  "learning_rate": 3.2944344703770195e-05,
2270
- "loss": 0.3248,
2271
  "step": 3670
2272
  },
2273
  {
2274
  "epoch": 3.3,
2275
  "learning_rate": 3.3034111310592466e-05,
2276
- "loss": 0.2731,
2277
  "step": 3680
2278
  },
2279
  {
2280
  "epoch": 3.31,
2281
  "learning_rate": 3.3123877917414724e-05,
2282
- "loss": 0.2927,
2283
  "step": 3690
2284
  },
2285
  {
2286
  "epoch": 3.32,
2287
  "learning_rate": 3.321364452423699e-05,
2288
- "loss": 0.2665,
2289
  "step": 3700
2290
  },
2291
  {
2292
  "epoch": 3.33,
2293
  "learning_rate": 3.330341113105925e-05,
2294
- "loss": 0.2269,
2295
  "step": 3710
2296
  },
2297
  {
2298
  "epoch": 3.34,
2299
  "learning_rate": 3.339317773788151e-05,
2300
- "loss": 0.279,
2301
  "step": 3720
2302
  },
2303
  {
2304
  "epoch": 3.35,
2305
  "learning_rate": 3.348294434470377e-05,
2306
- "loss": 0.2727,
2307
  "step": 3730
2308
  },
2309
  {
2310
  "epoch": 3.36,
2311
  "learning_rate": 3.3572710951526034e-05,
2312
- "loss": 0.2918,
2313
  "step": 3740
2314
  },
2315
  {
2316
  "epoch": 3.36,
2317
  "learning_rate": 3.36624775583483e-05,
2318
- "loss": 0.2857,
2319
  "step": 3750
2320
  },
2321
  {
2322
  "epoch": 3.37,
2323
  "learning_rate": 3.375224416517056e-05,
2324
- "loss": 0.2641,
2325
  "step": 3760
2326
  },
2327
  {
2328
  "epoch": 3.38,
2329
  "learning_rate": 3.384201077199282e-05,
2330
- "loss": 0.2134,
2331
  "step": 3770
2332
  },
2333
  {
2334
  "epoch": 3.39,
2335
  "learning_rate": 3.3931777378815085e-05,
2336
- "loss": 0.2589,
2337
  "step": 3780
2338
  },
2339
  {
2340
  "epoch": 3.4,
2341
  "learning_rate": 3.402154398563734e-05,
2342
- "loss": 0.2597,
2343
  "step": 3790
2344
  },
2345
  {
2346
  "epoch": 3.41,
2347
  "learning_rate": 3.411131059245961e-05,
2348
- "loss": 0.2538,
2349
  "step": 3800
2350
  },
2351
  {
2352
  "epoch": 3.42,
2353
  "learning_rate": 3.4201077199281866e-05,
2354
- "loss": 0.271,
2355
  "step": 3810
2356
  },
2357
  {
2358
  "epoch": 3.43,
2359
  "learning_rate": 3.429084380610413e-05,
2360
- "loss": 0.2326,
2361
  "step": 3820
2362
  },
2363
  {
2364
  "epoch": 3.44,
2365
  "learning_rate": 3.4380610412926395e-05,
2366
- "loss": 0.26,
2367
  "step": 3830
2368
  },
2369
  {
2370
  "epoch": 3.44,
2371
  "learning_rate": 3.447037701974866e-05,
2372
- "loss": 0.2703,
2373
  "step": 3840
2374
  },
2375
  {
2376
  "epoch": 3.45,
2377
  "learning_rate": 3.456014362657092e-05,
2378
- "loss": 0.2996,
2379
  "step": 3850
2380
  },
2381
  {
2382
  "epoch": 3.46,
2383
  "learning_rate": 3.464991023339318e-05,
2384
- "loss": 0.2231,
2385
  "step": 3860
2386
  },
2387
  {
2388
  "epoch": 3.47,
2389
  "learning_rate": 3.473967684021544e-05,
2390
- "loss": 0.287,
2391
  "step": 3870
2392
  },
2393
  {
2394
  "epoch": 3.48,
2395
  "learning_rate": 3.4829443447037704e-05,
2396
- "loss": 0.2877,
2397
  "step": 3880
2398
  },
2399
  {
2400
  "epoch": 3.49,
2401
  "learning_rate": 3.491921005385996e-05,
2402
- "loss": 0.2941,
2403
  "step": 3890
2404
  },
2405
  {
2406
  "epoch": 3.5,
2407
  "learning_rate": 3.500897666068223e-05,
2408
- "loss": 0.283,
2409
  "step": 3900
2410
  },
2411
  {
2412
  "epoch": 3.51,
2413
  "learning_rate": 3.509874326750449e-05,
2414
- "loss": 0.2899,
2415
  "step": 3910
2416
  },
2417
  {
2418
  "epoch": 3.52,
2419
  "learning_rate": 3.5188509874326756e-05,
2420
- "loss": 0.2669,
2421
  "step": 3920
2422
  },
2423
  {
2424
  "epoch": 3.53,
2425
  "learning_rate": 3.5278276481149014e-05,
2426
- "loss": 0.2673,
2427
  "step": 3930
2428
  },
2429
  {
2430
  "epoch": 3.53,
2431
  "learning_rate": 3.536804308797128e-05,
2432
- "loss": 0.2427,
2433
  "step": 3940
2434
  },
2435
  {
2436
  "epoch": 3.54,
2437
  "learning_rate": 3.5457809694793536e-05,
2438
- "loss": 0.3008,
2439
  "step": 3950
2440
  },
2441
  {
2442
  "epoch": 3.55,
2443
  "learning_rate": 3.55475763016158e-05,
2444
- "loss": 0.2333,
2445
  "step": 3960
2446
  },
2447
  {
2448
  "epoch": 3.56,
2449
  "learning_rate": 3.5637342908438065e-05,
2450
- "loss": 0.2415,
2451
  "step": 3970
2452
  },
2453
  {
2454
  "epoch": 3.57,
2455
  "learning_rate": 3.572710951526032e-05,
2456
- "loss": 0.2642,
2457
  "step": 3980
2458
  },
2459
  {
2460
  "epoch": 3.58,
2461
  "learning_rate": 3.581687612208259e-05,
2462
- "loss": 0.2814,
2463
  "step": 3990
2464
  },
2465
  {
2466
  "epoch": 3.59,
2467
  "learning_rate": 3.5906642728904846e-05,
2468
- "loss": 0.2607,
2469
  "step": 4000
2470
  },
2471
  {
2472
  "epoch": 3.6,
2473
  "learning_rate": 3.599640933572711e-05,
2474
- "loss": 0.2062,
2475
  "step": 4010
2476
  },
2477
  {
2478
  "epoch": 3.61,
2479
  "learning_rate": 3.6086175942549375e-05,
2480
- "loss": 0.2388,
2481
  "step": 4020
2482
  },
2483
  {
2484
  "epoch": 3.62,
2485
  "learning_rate": 3.617594254937163e-05,
2486
- "loss": 0.2364,
2487
  "step": 4030
2488
  },
2489
  {
2490
  "epoch": 3.62,
2491
  "learning_rate": 3.62657091561939e-05,
2492
- "loss": 0.2803,
2493
  "step": 4040
2494
  },
2495
  {
2496
  "epoch": 3.63,
2497
  "learning_rate": 3.635547576301616e-05,
2498
- "loss": 0.236,
2499
  "step": 4050
2500
  },
2501
  {
2502
  "epoch": 3.64,
2503
  "learning_rate": 3.644524236983842e-05,
2504
- "loss": 0.2795,
2505
  "step": 4060
2506
  },
2507
  {
2508
  "epoch": 3.65,
2509
  "learning_rate": 3.6535008976660684e-05,
2510
- "loss": 0.2728,
2511
  "step": 4070
2512
  },
2513
  {
2514
  "epoch": 3.66,
2515
  "learning_rate": 3.662477558348294e-05,
2516
- "loss": 0.2897,
2517
  "step": 4080
2518
  },
2519
  {
2520
  "epoch": 3.67,
2521
  "learning_rate": 3.671454219030521e-05,
2522
- "loss": 0.257,
2523
  "step": 4090
2524
  },
2525
  {
2526
  "epoch": 3.68,
2527
  "learning_rate": 3.6804308797127465e-05,
2528
- "loss": 0.2913,
2529
  "step": 4100
2530
  },
2531
  {
2532
  "epoch": 3.69,
2533
  "learning_rate": 3.6894075403949736e-05,
2534
- "loss": 0.212,
2535
  "step": 4110
2536
  },
2537
  {
2538
  "epoch": 3.7,
2539
  "learning_rate": 3.6983842010771994e-05,
2540
- "loss": 0.2672,
2541
  "step": 4120
2542
  },
2543
  {
2544
  "epoch": 3.7,
2545
  "learning_rate": 3.707360861759426e-05,
2546
- "loss": 0.2852,
2547
  "step": 4130
2548
  },
2549
  {
2550
  "epoch": 3.71,
2551
  "learning_rate": 3.7163375224416516e-05,
2552
- "loss": 0.2238,
2553
  "step": 4140
2554
  },
2555
  {
2556
  "epoch": 3.72,
2557
  "learning_rate": 3.725314183123878e-05,
2558
- "loss": 0.2344,
2559
  "step": 4150
2560
  },
2561
  {
2562
  "epoch": 3.73,
2563
  "learning_rate": 3.734290843806104e-05,
2564
- "loss": 0.2695,
2565
  "step": 4160
2566
  },
2567
  {
2568
  "epoch": 3.74,
2569
  "learning_rate": 3.74326750448833e-05,
2570
- "loss": 0.2729,
2571
  "step": 4170
2572
  },
2573
  {
2574
  "epoch": 3.75,
2575
  "learning_rate": 3.752244165170557e-05,
2576
- "loss": 0.2692,
2577
  "step": 4180
2578
  },
2579
  {
2580
  "epoch": 3.76,
2581
  "learning_rate": 3.761220825852783e-05,
2582
- "loss": 0.2845,
2583
  "step": 4190
2584
  },
2585
  {
2586
  "epoch": 3.77,
2587
  "learning_rate": 3.770197486535009e-05,
2588
- "loss": 0.244,
2589
  "step": 4200
2590
  },
2591
  {
2592
  "epoch": 3.78,
2593
  "learning_rate": 3.7791741472172355e-05,
2594
- "loss": 0.2621,
2595
  "step": 4210
2596
  },
2597
  {
2598
  "epoch": 3.79,
2599
  "learning_rate": 3.788150807899461e-05,
2600
- "loss": 0.2581,
2601
  "step": 4220
2602
  },
2603
  {
2604
  "epoch": 3.79,
2605
  "learning_rate": 3.797127468581688e-05,
2606
- "loss": 0.2201,
2607
  "step": 4230
2608
  },
2609
  {
2610
  "epoch": 3.8,
2611
  "learning_rate": 3.8061041292639135e-05,
2612
- "loss": 0.2731,
2613
  "step": 4240
2614
  },
2615
  {
2616
  "epoch": 3.81,
2617
  "learning_rate": 3.815080789946141e-05,
2618
- "loss": 0.3085,
2619
  "step": 4250
2620
  },
2621
  {
2622
  "epoch": 3.82,
2623
  "learning_rate": 3.8240574506283664e-05,
2624
- "loss": 0.2729,
2625
  "step": 4260
2626
  },
2627
  {
2628
  "epoch": 3.83,
2629
  "learning_rate": 3.833034111310593e-05,
2630
- "loss": 0.2282,
2631
  "step": 4270
2632
  },
2633
  {
2634
  "epoch": 3.84,
2635
  "learning_rate": 3.842010771992819e-05,
2636
- "loss": 0.1955,
2637
  "step": 4280
2638
  },
2639
  {
2640
  "epoch": 3.85,
2641
  "learning_rate": 3.850987432675045e-05,
2642
- "loss": 0.2213,
2643
  "step": 4290
2644
  },
2645
  {
2646
  "epoch": 3.86,
2647
  "learning_rate": 3.859964093357271e-05,
2648
- "loss": 0.2719,
2649
  "step": 4300
2650
  },
2651
  {
2652
  "epoch": 3.87,
2653
  "learning_rate": 3.8689407540394974e-05,
2654
- "loss": 0.2045,
2655
  "step": 4310
2656
  },
2657
  {
2658
  "epoch": 3.88,
2659
  "learning_rate": 3.877917414721723e-05,
2660
- "loss": 0.2468,
2661
  "step": 4320
2662
  },
2663
  {
2664
  "epoch": 3.88,
2665
  "learning_rate": 3.88689407540395e-05,
2666
- "loss": 0.259,
2667
  "step": 4330
2668
  },
2669
  {
2670
  "epoch": 3.89,
2671
  "learning_rate": 3.895870736086176e-05,
2672
- "loss": 0.2462,
2673
  "step": 4340
2674
  },
2675
  {
2676
  "epoch": 3.9,
2677
  "learning_rate": 3.9048473967684026e-05,
2678
- "loss": 0.2278,
2679
  "step": 4350
2680
  },
2681
  {
2682
  "epoch": 3.91,
2683
  "learning_rate": 3.9138240574506283e-05,
2684
- "loss": 0.2701,
2685
  "step": 4360
2686
  },
2687
  {
2688
  "epoch": 3.92,
2689
  "learning_rate": 3.922800718132855e-05,
2690
- "loss": 0.269,
2691
  "step": 4370
2692
  },
2693
  {
2694
  "epoch": 3.93,
2695
  "learning_rate": 3.9317773788150806e-05,
2696
- "loss": 0.2938,
2697
  "step": 4380
2698
  },
2699
  {
2700
  "epoch": 3.94,
2701
  "learning_rate": 3.940754039497307e-05,
2702
- "loss": 0.2669,
2703
  "step": 4390
2704
  },
2705
  {
2706
  "epoch": 3.95,
2707
  "learning_rate": 3.9497307001795335e-05,
2708
- "loss": 0.2557,
2709
  "step": 4400
2710
  },
2711
  {
2712
  "epoch": 3.96,
2713
  "learning_rate": 3.95870736086176e-05,
2714
- "loss": 0.2629,
2715
  "step": 4410
2716
  },
2717
  {
2718
  "epoch": 3.97,
2719
  "learning_rate": 3.967684021543986e-05,
2720
- "loss": 0.2168,
2721
  "step": 4420
2722
  },
2723
  {
2724
  "epoch": 3.97,
2725
  "learning_rate": 3.976660682226212e-05,
2726
- "loss": 0.2734,
2727
  "step": 4430
2728
  },
2729
  {
2730
  "epoch": 3.98,
2731
  "learning_rate": 3.985637342908438e-05,
2732
- "loss": 0.2119,
2733
  "step": 4440
2734
  },
2735
  {
2736
  "epoch": 3.99,
2737
  "learning_rate": 3.9946140035906645e-05,
2738
- "loss": 0.2177,
2739
  "step": 4450
2740
  },
2741
  {
2742
  "epoch": 4.0,
2743
  "eval_accuracy": {
2744
- "accuracy": 0.9484189280107648
2745
  },
2746
  "eval_f1": {
2747
- "f1": 0.9455383407746016
2748
  },
2749
- "eval_loss": 0.1543792188167572,
2750
  "eval_precision": {
2751
- "precision": 0.9499526075822722
2752
  },
2753
  "eval_recall": {
2754
- "recall": 0.9425902511351058
2755
  },
2756
- "eval_runtime": 165.5499,
2757
- "eval_samples_per_second": 107.738,
2758
- "eval_steps_per_second": 6.735,
2759
  "step": 4459
2760
  },
2761
  {
2762
  "epoch": 4.0,
2763
  "learning_rate": 4.00359066427289e-05,
2764
- "loss": 0.252,
2765
  "step": 4460
2766
  },
2767
  {
2768
  "epoch": 4.01,
2769
  "learning_rate": 4.0125673249551174e-05,
2770
- "loss": 0.1988,
2771
  "step": 4470
2772
  },
2773
  {
2774
  "epoch": 4.02,
2775
  "learning_rate": 4.021543985637343e-05,
2776
- "loss": 0.2268,
2777
  "step": 4480
2778
  },
2779
  {
2780
  "epoch": 4.03,
2781
  "learning_rate": 4.0305206463195696e-05,
2782
- "loss": 0.2013,
2783
  "step": 4490
2784
  },
2785
  {
2786
  "epoch": 4.04,
2787
  "learning_rate": 4.0394973070017954e-05,
2788
- "loss": 0.2409,
2789
  "step": 4500
2790
  },
2791
  {
2792
  "epoch": 4.05,
2793
  "learning_rate": 4.048473967684022e-05,
2794
- "loss": 0.2122,
2795
  "step": 4510
2796
  },
2797
  {
2798
  "epoch": 4.05,
2799
  "learning_rate": 4.0574506283662476e-05,
2800
- "loss": 0.2246,
2801
  "step": 4520
2802
  },
2803
  {
2804
  "epoch": 4.06,
2805
  "learning_rate": 4.066427289048474e-05,
2806
- "loss": 0.251,
2807
  "step": 4530
2808
  },
2809
  {
2810
  "epoch": 4.07,
2811
  "learning_rate": 4.0754039497307006e-05,
2812
- "loss": 0.2115,
2813
  "step": 4540
2814
  },
2815
  {
2816
  "epoch": 4.08,
2817
  "learning_rate": 4.084380610412927e-05,
2818
- "loss": 0.2536,
2819
  "step": 4550
2820
  },
2821
  {
2822
  "epoch": 4.09,
2823
  "learning_rate": 4.093357271095153e-05,
2824
- "loss": 0.2526,
2825
  "step": 4560
2826
  },
2827
  {
2828
  "epoch": 4.1,
2829
  "learning_rate": 4.102333931777379e-05,
2830
- "loss": 0.2337,
2831
  "step": 4570
2832
  },
2833
  {
2834
  "epoch": 4.11,
2835
  "learning_rate": 4.111310592459605e-05,
2836
- "loss": 0.2455,
2837
  "step": 4580
2838
  },
2839
  {
2840
  "epoch": 4.12,
2841
  "learning_rate": 4.1202872531418315e-05,
2842
- "loss": 0.2178,
2843
  "step": 4590
2844
  },
2845
  {
2846
  "epoch": 4.13,
2847
  "learning_rate": 4.129263913824057e-05,
2848
- "loss": 0.2402,
2849
  "step": 4600
2850
  },
2851
  {
2852
  "epoch": 4.14,
2853
  "learning_rate": 4.1382405745062844e-05,
2854
- "loss": 0.2868,
2855
  "step": 4610
2856
  },
2857
  {
2858
  "epoch": 4.14,
2859
  "learning_rate": 4.14721723518851e-05,
2860
- "loss": 0.1957,
2861
  "step": 4620
2862
  },
2863
  {
2864
  "epoch": 4.15,
2865
  "learning_rate": 4.156193895870737e-05,
2866
- "loss": 0.2701,
2867
  "step": 4630
2868
  },
2869
  {
2870
  "epoch": 4.16,
2871
  "learning_rate": 4.1651705565529625e-05,
2872
- "loss": 0.2711,
2873
  "step": 4640
2874
  },
2875
  {
2876
  "epoch": 4.17,
2877
  "learning_rate": 4.174147217235189e-05,
2878
- "loss": 0.2992,
2879
  "step": 4650
2880
  },
2881
  {
2882
  "epoch": 4.18,
2883
  "learning_rate": 4.183123877917415e-05,
2884
- "loss": 0.2284,
2885
  "step": 4660
2886
  },
2887
  {
2888
  "epoch": 4.19,
2889
  "learning_rate": 4.192100538599641e-05,
2890
- "loss": 0.2522,
2891
  "step": 4670
2892
  },
2893
  {
2894
  "epoch": 4.2,
2895
  "learning_rate": 4.2010771992818676e-05,
2896
- "loss": 0.2039,
2897
  "step": 4680
2898
  },
2899
  {
2900
  "epoch": 4.21,
2901
  "learning_rate": 4.2100538599640934e-05,
2902
- "loss": 0.2517,
2903
  "step": 4690
2904
  },
2905
  {
2906
  "epoch": 4.22,
2907
  "learning_rate": 4.21903052064632e-05,
2908
- "loss": 0.2475,
2909
  "step": 4700
2910
  },
2911
  {
2912
  "epoch": 4.23,
2913
  "learning_rate": 4.228007181328546e-05,
2914
- "loss": 0.2224,
2915
  "step": 4710
2916
  },
2917
  {
2918
  "epoch": 4.23,
2919
  "learning_rate": 4.236983842010772e-05,
2920
- "loss": 0.2761,
2921
  "step": 4720
2922
  },
2923
  {
2924
  "epoch": 4.24,
2925
  "learning_rate": 4.2459605026929986e-05,
2926
- "loss": 0.2125,
2927
  "step": 4730
2928
  },
2929
  {
2930
  "epoch": 4.25,
2931
  "learning_rate": 4.2549371633752244e-05,
2932
- "loss": 0.2067,
2933
  "step": 4740
2934
  },
2935
  {
2936
  "epoch": 4.26,
2937
  "learning_rate": 4.263913824057451e-05,
2938
- "loss": 0.2237,
2939
  "step": 4750
2940
  },
2941
  {
2942
  "epoch": 4.27,
2943
  "learning_rate": 4.272890484739677e-05,
2944
- "loss": 0.2223,
2945
  "step": 4760
2946
  },
2947
  {
2948
  "epoch": 4.28,
2949
  "learning_rate": 4.281867145421903e-05,
2950
- "loss": 0.1756,
2951
  "step": 4770
2952
  },
2953
  {
2954
  "epoch": 4.29,
2955
  "learning_rate": 4.2908438061041295e-05,
2956
- "loss": 0.2134,
2957
  "step": 4780
2958
  },
2959
  {
2960
  "epoch": 4.3,
2961
  "learning_rate": 4.299820466786356e-05,
2962
- "loss": 0.2791,
2963
  "step": 4790
2964
  },
2965
  {
2966
  "epoch": 4.31,
2967
  "learning_rate": 4.308797127468582e-05,
2968
- "loss": 0.2446,
2969
  "step": 4800
2970
  },
2971
  {
2972
  "epoch": 4.31,
2973
  "learning_rate": 4.317773788150808e-05,
2974
- "loss": 0.2578,
2975
  "step": 4810
2976
  },
2977
  {
2978
  "epoch": 4.32,
2979
  "learning_rate": 4.326750448833034e-05,
2980
- "loss": 0.2233,
2981
  "step": 4820
2982
  },
2983
  {
2984
  "epoch": 4.33,
2985
  "learning_rate": 4.3357271095152605e-05,
2986
- "loss": 0.2427,
2987
  "step": 4830
2988
  },
2989
  {
2990
  "epoch": 4.34,
2991
  "learning_rate": 4.344703770197487e-05,
2992
- "loss": 0.2652,
2993
  "step": 4840
2994
  },
2995
  {
2996
  "epoch": 4.35,
2997
  "learning_rate": 4.353680430879713e-05,
2998
- "loss": 0.252,
2999
  "step": 4850
3000
  },
3001
  {
3002
  "epoch": 4.36,
3003
  "learning_rate": 4.362657091561939e-05,
3004
- "loss": 0.2174,
3005
  "step": 4860
3006
  },
3007
  {
3008
  "epoch": 4.37,
3009
  "learning_rate": 4.371633752244165e-05,
3010
- "loss": 0.2391,
3011
  "step": 4870
3012
  },
3013
  {
3014
  "epoch": 4.38,
3015
  "learning_rate": 4.3806104129263914e-05,
3016
- "loss": 0.2156,
3017
  "step": 4880
3018
  },
3019
  {
3020
  "epoch": 4.39,
3021
  "learning_rate": 4.389587073608618e-05,
3022
- "loss": 0.2291,
3023
  "step": 4890
3024
  },
3025
  {
3026
  "epoch": 4.4,
3027
  "learning_rate": 4.398563734290844e-05,
3028
- "loss": 0.2048,
3029
  "step": 4900
3030
  },
3031
  {
3032
  "epoch": 4.4,
3033
  "learning_rate": 4.40754039497307e-05,
3034
- "loss": 0.2036,
3035
  "step": 4910
3036
  },
3037
  {
3038
  "epoch": 4.41,
3039
  "learning_rate": 4.4165170556552966e-05,
3040
- "loss": 0.2258,
3041
  "step": 4920
3042
  },
3043
  {
3044
  "epoch": 4.42,
3045
  "learning_rate": 4.4254937163375224e-05,
3046
- "loss": 0.2229,
3047
  "step": 4930
3048
  },
3049
  {
3050
  "epoch": 4.43,
3051
  "learning_rate": 4.434470377019749e-05,
3052
- "loss": 0.2562,
3053
  "step": 4940
3054
  },
3055
  {
3056
  "epoch": 4.44,
3057
  "learning_rate": 4.4434470377019746e-05,
3058
- "loss": 0.2375,
3059
  "step": 4950
3060
  },
3061
  {
3062
  "epoch": 4.45,
3063
  "learning_rate": 4.452423698384201e-05,
3064
- "loss": 0.2262,
3065
  "step": 4960
3066
  },
3067
  {
3068
  "epoch": 4.46,
3069
  "learning_rate": 4.4614003590664275e-05,
3070
- "loss": 0.2465,
3071
  "step": 4970
3072
  },
3073
  {
3074
  "epoch": 4.47,
3075
  "learning_rate": 4.470377019748654e-05,
3076
- "loss": 0.2164,
3077
  "step": 4980
3078
  },
3079
  {
3080
  "epoch": 4.48,
3081
  "learning_rate": 4.47935368043088e-05,
3082
- "loss": 0.194,
3083
  "step": 4990
3084
  },
3085
  {
3086
  "epoch": 4.49,
3087
  "learning_rate": 4.488330341113106e-05,
3088
- "loss": 0.2824,
3089
  "step": 5000
3090
  },
3091
  {
3092
  "epoch": 4.49,
3093
  "learning_rate": 4.497307001795332e-05,
3094
- "loss": 0.2037,
3095
  "step": 5010
3096
  },
3097
  {
3098
  "epoch": 4.5,
3099
  "learning_rate": 4.5062836624775585e-05,
3100
- "loss": 0.2506,
3101
  "step": 5020
3102
  },
3103
  {
3104
  "epoch": 4.51,
3105
  "learning_rate": 4.515260323159784e-05,
3106
- "loss": 0.1948,
3107
  "step": 5030
3108
  },
3109
  {
3110
  "epoch": 4.52,
3111
  "learning_rate": 4.5242369838420114e-05,
3112
- "loss": 0.2214,
3113
  "step": 5040
3114
  },
3115
  {
3116
  "epoch": 4.53,
3117
  "learning_rate": 4.533213644524237e-05,
3118
- "loss": 0.247,
3119
  "step": 5050
3120
  },
3121
  {
3122
  "epoch": 4.54,
3123
  "learning_rate": 4.5421903052064636e-05,
3124
- "loss": 0.2344,
3125
  "step": 5060
3126
  },
3127
  {
3128
  "epoch": 4.55,
3129
  "learning_rate": 4.5511669658886894e-05,
3130
- "loss": 0.1798,
3131
  "step": 5070
3132
  },
3133
  {
3134
  "epoch": 4.56,
3135
  "learning_rate": 4.560143626570916e-05,
3136
- "loss": 0.1861,
3137
  "step": 5080
3138
  },
3139
  {
3140
  "epoch": 4.57,
3141
  "learning_rate": 4.5691202872531417e-05,
3142
- "loss": 0.3008,
3143
  "step": 5090
3144
  },
3145
  {
3146
  "epoch": 4.58,
3147
  "learning_rate": 4.578096947935368e-05,
3148
- "loss": 0.2465,
3149
  "step": 5100
3150
  },
3151
  {
3152
  "epoch": 4.58,
3153
  "learning_rate": 4.5870736086175946e-05,
3154
- "loss": 0.2362,
3155
  "step": 5110
3156
  },
3157
  {
3158
  "epoch": 4.59,
3159
  "learning_rate": 4.596050269299821e-05,
3160
- "loss": 0.2791,
3161
  "step": 5120
3162
  },
3163
  {
3164
  "epoch": 4.6,
3165
  "learning_rate": 4.605026929982047e-05,
3166
- "loss": 0.2231,
3167
  "step": 5130
3168
  },
3169
  {
3170
  "epoch": 4.61,
3171
  "learning_rate": 4.614003590664273e-05,
3172
- "loss": 0.2162,
3173
  "step": 5140
3174
  },
3175
  {
3176
  "epoch": 4.62,
3177
  "learning_rate": 4.622980251346499e-05,
3178
- "loss": 0.2517,
3179
  "step": 5150
3180
  },
3181
  {
3182
  "epoch": 4.63,
3183
  "learning_rate": 4.6319569120287255e-05,
3184
- "loss": 0.238,
3185
  "step": 5160
3186
  },
3187
  {
3188
  "epoch": 4.64,
3189
  "learning_rate": 4.640933572710951e-05,
3190
- "loss": 0.246,
3191
  "step": 5170
3192
  },
3193
  {
3194
  "epoch": 4.65,
3195
  "learning_rate": 4.6499102333931784e-05,
3196
- "loss": 0.2644,
3197
  "step": 5180
3198
  },
3199
  {
3200
  "epoch": 4.66,
3201
  "learning_rate": 4.658886894075404e-05,
3202
- "loss": 0.2377,
3203
  "step": 5190
3204
  },
3205
  {
3206
  "epoch": 4.66,
3207
  "learning_rate": 4.667863554757631e-05,
3208
- "loss": 0.2456,
3209
  "step": 5200
3210
  },
3211
  {
3212
  "epoch": 4.67,
3213
  "learning_rate": 4.6768402154398565e-05,
3214
- "loss": 0.2361,
3215
  "step": 5210
3216
  },
3217
  {
3218
  "epoch": 4.68,
3219
  "learning_rate": 4.685816876122083e-05,
3220
- "loss": 0.213,
3221
  "step": 5220
3222
  },
3223
  {
3224
  "epoch": 4.69,
3225
  "learning_rate": 4.694793536804309e-05,
3226
- "loss": 0.198,
3227
  "step": 5230
3228
  },
3229
  {
3230
  "epoch": 4.7,
3231
  "learning_rate": 4.703770197486535e-05,
3232
- "loss": 0.1916,
3233
  "step": 5240
3234
  },
3235
  {
3236
  "epoch": 4.71,
3237
  "learning_rate": 4.7127468581687616e-05,
3238
- "loss": 0.2777,
3239
  "step": 5250
3240
  },
3241
  {
3242
  "epoch": 4.72,
3243
  "learning_rate": 4.721723518850988e-05,
3244
- "loss": 0.223,
3245
  "step": 5260
3246
  },
3247
  {
3248
  "epoch": 4.73,
3249
  "learning_rate": 4.730700179533214e-05,
3250
- "loss": 0.2366,
3251
  "step": 5270
3252
  },
3253
  {
3254
  "epoch": 4.74,
3255
  "learning_rate": 4.7396768402154403e-05,
3256
- "loss": 0.2375,
3257
  "step": 5280
3258
  },
3259
  {
3260
  "epoch": 4.75,
3261
  "learning_rate": 4.748653500897666e-05,
3262
- "loss": 0.1849,
3263
  "step": 5290
3264
  },
3265
  {
3266
  "epoch": 4.75,
3267
  "learning_rate": 4.7576301615798926e-05,
3268
- "loss": 0.1591,
3269
  "step": 5300
3270
  },
3271
  {
3272
  "epoch": 4.76,
3273
  "learning_rate": 4.7666068222621184e-05,
3274
- "loss": 0.1891,
3275
  "step": 5310
3276
  },
3277
  {
3278
  "epoch": 4.77,
3279
  "learning_rate": 4.775583482944345e-05,
3280
- "loss": 0.2269,
3281
  "step": 5320
3282
  },
3283
  {
3284
  "epoch": 4.78,
3285
  "learning_rate": 4.784560143626571e-05,
3286
- "loss": 0.2275,
3287
  "step": 5330
3288
  },
3289
  {
3290
  "epoch": 4.79,
3291
  "learning_rate": 4.793536804308798e-05,
3292
- "loss": 0.1934,
3293
  "step": 5340
3294
  },
3295
  {
3296
  "epoch": 4.8,
3297
  "learning_rate": 4.8025134649910235e-05,
3298
- "loss": 0.1878,
3299
  "step": 5350
3300
  },
3301
  {
3302
  "epoch": 4.81,
3303
  "learning_rate": 4.81149012567325e-05,
3304
- "loss": 0.2362,
3305
  "step": 5360
3306
  },
3307
  {
3308
  "epoch": 4.82,
3309
  "learning_rate": 4.820466786355476e-05,
3310
- "loss": 0.1989,
3311
  "step": 5370
3312
  },
3313
  {
3314
  "epoch": 4.83,
3315
  "learning_rate": 4.829443447037702e-05,
3316
- "loss": 0.2118,
3317
  "step": 5380
3318
  },
3319
  {
3320
  "epoch": 4.84,
3321
  "learning_rate": 4.838420107719928e-05,
3322
- "loss": 0.2637,
3323
  "step": 5390
3324
  },
3325
  {
3326
  "epoch": 4.84,
3327
  "learning_rate": 4.847396768402155e-05,
3328
- "loss": 0.2251,
3329
  "step": 5400
3330
  },
3331
  {
3332
  "epoch": 4.85,
3333
  "learning_rate": 4.856373429084381e-05,
3334
- "loss": 0.2176,
3335
  "step": 5410
3336
  },
3337
  {
3338
  "epoch": 4.86,
3339
  "learning_rate": 4.8653500897666074e-05,
3340
- "loss": 0.2177,
3341
  "step": 5420
3342
  },
3343
  {
3344
  "epoch": 4.87,
3345
  "learning_rate": 4.874326750448833e-05,
3346
- "loss": 0.1898,
3347
  "step": 5430
3348
  },
3349
  {
3350
  "epoch": 4.88,
3351
  "learning_rate": 4.8833034111310596e-05,
3352
- "loss": 0.2589,
3353
  "step": 5440
3354
  },
3355
  {
3356
  "epoch": 4.89,
3357
  "learning_rate": 4.8922800718132854e-05,
3358
- "loss": 0.2425,
3359
  "step": 5450
3360
  },
3361
  {
3362
  "epoch": 4.9,
3363
  "learning_rate": 4.901256732495512e-05,
3364
- "loss": 0.2131,
3365
  "step": 5460
3366
  },
3367
  {
3368
  "epoch": 4.91,
3369
  "learning_rate": 4.9102333931777383e-05,
3370
- "loss": 0.2607,
3371
  "step": 5470
3372
  },
3373
  {
3374
  "epoch": 4.92,
3375
  "learning_rate": 4.919210053859965e-05,
3376
- "loss": 0.2929,
3377
  "step": 5480
3378
  },
3379
  {
3380
  "epoch": 4.92,
3381
  "learning_rate": 4.9281867145421906e-05,
3382
- "loss": 0.207,
3383
  "step": 5490
3384
  },
3385
  {
3386
  "epoch": 4.93,
3387
  "learning_rate": 4.937163375224417e-05,
3388
- "loss": 0.3048,
3389
  "step": 5500
3390
  },
3391
  {
3392
  "epoch": 4.94,
3393
  "learning_rate": 4.946140035906643e-05,
3394
- "loss": 0.2231,
3395
  "step": 5510
3396
  },
3397
  {
3398
  "epoch": 4.95,
3399
  "learning_rate": 4.955116696588869e-05,
3400
- "loss": 0.1971,
3401
  "step": 5520
3402
  },
3403
  {
3404
  "epoch": 4.96,
3405
  "learning_rate": 4.964093357271095e-05,
3406
- "loss": 0.2188,
3407
  "step": 5530
3408
  },
3409
  {
3410
  "epoch": 4.97,
3411
  "learning_rate": 4.9730700179533215e-05,
3412
- "loss": 0.162,
3413
  "step": 5540
3414
  },
3415
  {
3416
  "epoch": 4.98,
3417
  "learning_rate": 4.982046678635548e-05,
3418
- "loss": 0.1836,
3419
  "step": 5550
3420
  },
3421
  {
3422
  "epoch": 4.99,
3423
  "learning_rate": 4.991023339317774e-05,
3424
- "loss": 0.2438,
3425
  "step": 5560
3426
  },
3427
  {
3428
  "epoch": 5.0,
3429
  "learning_rate": 5e-05,
3430
- "loss": 0.2397,
3431
  "step": 5570
3432
  },
3433
  {
3434
  "epoch": 5.0,
3435
  "eval_accuracy": {
3436
- "accuracy": 0.9569410181655079
3437
  },
3438
  "eval_f1": {
3439
- "f1": 0.9546849040800426
3440
  },
3441
- "eval_loss": 0.1309136003255844,
3442
  "eval_precision": {
3443
- "precision": 0.9547567778919024
3444
  },
3445
  "eval_recall": {
3446
- "recall": 0.9546610577932907
3447
  },
3448
- "eval_runtime": 167.576,
3449
- "eval_samples_per_second": 106.435,
3450
- "eval_steps_per_second": 6.654,
3451
  "step": 5573
3452
  }
3453
  ],
 
1
  {
2
+ "best_metric": 0.1283472627401352,
3
  "best_model_checkpoint": "vit-base-patch16-224-in21k-crack-detectorVITmain50epochs\\checkpoint-5573",
4
  "epoch": 4.999327203408836,
5
  "eval_steps": 500,
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 8.976660682226213e-08,
14
+ "loss": 1.3961,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.02,
19
  "learning_rate": 1.7953321364452426e-07,
20
+ "loss": 1.395,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.03,
25
  "learning_rate": 2.692998204667864e-07,
26
+ "loss": 1.3978,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 3.590664272890485e-07,
32
+ "loss": 1.3963,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.04,
37
  "learning_rate": 4.4883303411131064e-07,
38
+ "loss": 1.3907,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.05,
43
  "learning_rate": 5.385996409335728e-07,
44
+ "loss": 1.396,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.06,
49
  "learning_rate": 6.283662477558349e-07,
50
+ "loss": 1.3885,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 7.18132854578097e-07,
56
+ "loss": 1.387,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.08,
61
  "learning_rate": 8.078994614003591e-07,
62
+ "loss": 1.3894,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.09,
67
  "learning_rate": 8.976660682226213e-07,
68
+ "loss": 1.3806,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.1,
73
  "learning_rate": 9.874326750448833e-07,
74
+ "loss": 1.383,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.11,
79
  "learning_rate": 1.0771992818671456e-06,
80
+ "loss": 1.3764,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.12,
85
  "learning_rate": 1.1669658886894075e-06,
86
+ "loss": 1.3688,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.13,
91
  "learning_rate": 1.2567324955116697e-06,
92
+ "loss": 1.3708,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.13,
97
  "learning_rate": 1.3464991023339318e-06,
98
+ "loss": 1.366,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.14,
103
  "learning_rate": 1.436265709156194e-06,
104
+ "loss": 1.371,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.15,
109
  "learning_rate": 1.5260323159784561e-06,
110
+ "loss": 1.3558,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.16,
115
  "learning_rate": 1.6157989228007182e-06,
116
+ "loss": 1.3584,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.17,
121
  "learning_rate": 1.7055655296229805e-06,
122
+ "loss": 1.3409,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.18,
127
  "learning_rate": 1.7953321364452425e-06,
128
+ "loss": 1.346,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.19,
133
  "learning_rate": 1.8850987432675046e-06,
134
+ "loss": 1.3355,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.2,
139
  "learning_rate": 1.9748653500897667e-06,
140
+ "loss": 1.3323,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.21,
145
  "learning_rate": 2.064631956912029e-06,
146
+ "loss": 1.3231,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.22,
151
  "learning_rate": 2.1543985637342912e-06,
152
+ "loss": 1.3129,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.22,
157
  "learning_rate": 2.244165170556553e-06,
158
+ "loss": 1.3178,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.23,
163
  "learning_rate": 2.333931777378815e-06,
164
+ "loss": 1.2953,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.24,
169
  "learning_rate": 2.423698384201077e-06,
170
+ "loss": 1.2938,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.25,
175
  "learning_rate": 2.5134649910233395e-06,
176
+ "loss": 1.2849,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.26,
181
  "learning_rate": 2.6032315978456015e-06,
182
+ "loss": 1.2813,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.27,
187
  "learning_rate": 2.6929982046678636e-06,
188
+ "loss": 1.2811,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.28,
193
  "learning_rate": 2.7827648114901257e-06,
194
+ "loss": 1.2526,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 0.29,
199
  "learning_rate": 2.872531418312388e-06,
200
+ "loss": 1.2438,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 0.3,
205
  "learning_rate": 2.9622980251346502e-06,
206
+ "loss": 1.2145,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 0.31,
211
  "learning_rate": 3.0520646319569123e-06,
212
+ "loss": 1.2146,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 0.31,
217
  "learning_rate": 3.1418312387791743e-06,
218
+ "loss": 1.1883,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 0.32,
223
  "learning_rate": 3.2315978456014364e-06,
224
+ "loss": 1.1891,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 0.33,
229
  "learning_rate": 3.321364452423698e-06,
230
+ "loss": 1.1723,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 0.34,
235
  "learning_rate": 3.411131059245961e-06,
236
+ "loss": 1.1417,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 0.35,
241
  "learning_rate": 3.500897666068223e-06,
242
+ "loss": 1.1338,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 0.36,
247
  "learning_rate": 3.590664272890485e-06,
248
+ "loss": 1.1004,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 0.37,
253
  "learning_rate": 3.680430879712747e-06,
254
+ "loss": 1.0873,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 0.38,
259
  "learning_rate": 3.770197486535009e-06,
260
+ "loss": 1.0358,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 0.39,
265
  "learning_rate": 3.859964093357271e-06,
266
+ "loss": 1.0517,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 0.39,
271
  "learning_rate": 3.949730700179533e-06,
272
+ "loss": 1.0139,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 0.4,
277
  "learning_rate": 4.039497307001795e-06,
278
+ "loss": 1.0294,
279
  "step": 450
280
  },
281
  {
282
  "epoch": 0.41,
283
  "learning_rate": 4.129263913824058e-06,
284
+ "loss": 1.0171,
285
  "step": 460
286
  },
287
  {
288
  "epoch": 0.42,
289
  "learning_rate": 4.21903052064632e-06,
290
+ "loss": 0.9827,
291
  "step": 470
292
  },
293
  {
294
  "epoch": 0.43,
295
  "learning_rate": 4.3087971274685824e-06,
296
+ "loss": 0.9382,
297
  "step": 480
298
  },
299
  {
300
  "epoch": 0.44,
301
  "learning_rate": 4.3985637342908445e-06,
302
+ "loss": 0.9145,
303
  "step": 490
304
  },
305
  {
306
  "epoch": 0.45,
307
  "learning_rate": 4.488330341113106e-06,
308
+ "loss": 0.9276,
309
  "step": 500
310
  },
311
  {
312
  "epoch": 0.46,
313
  "learning_rate": 4.578096947935368e-06,
314
+ "loss": 0.8801,
315
  "step": 510
316
  },
317
  {
318
  "epoch": 0.47,
319
  "learning_rate": 4.66786355475763e-06,
320
+ "loss": 0.8906,
321
  "step": 520
322
  },
323
  {
324
  "epoch": 0.48,
325
  "learning_rate": 4.757630161579892e-06,
326
+ "loss": 0.9025,
327
  "step": 530
328
  },
329
  {
330
  "epoch": 0.48,
331
  "learning_rate": 4.847396768402154e-06,
332
+ "loss": 0.8621,
333
  "step": 540
334
  },
335
  {
336
  "epoch": 0.49,
337
  "learning_rate": 4.937163375224417e-06,
338
+ "loss": 0.8653,
339
  "step": 550
340
  },
341
  {
342
  "epoch": 0.5,
343
  "learning_rate": 5.026929982046679e-06,
344
+ "loss": 0.8292,
345
  "step": 560
346
  },
347
  {
348
  "epoch": 0.51,
349
  "learning_rate": 5.116696588868941e-06,
350
+ "loss": 0.8195,
351
  "step": 570
352
  },
353
  {
354
  "epoch": 0.52,
355
  "learning_rate": 5.206463195691203e-06,
356
+ "loss": 0.8026,
357
  "step": 580
358
  },
359
  {
360
  "epoch": 0.53,
361
  "learning_rate": 5.296229802513465e-06,
362
+ "loss": 0.841,
363
  "step": 590
364
  },
365
  {
366
  "epoch": 0.54,
367
  "learning_rate": 5.385996409335727e-06,
368
+ "loss": 0.778,
369
  "step": 600
370
  },
371
  {
372
  "epoch": 0.55,
373
  "learning_rate": 5.475763016157989e-06,
374
+ "loss": 0.7706,
375
  "step": 610
376
  },
377
  {
378
  "epoch": 0.56,
379
  "learning_rate": 5.565529622980251e-06,
380
+ "loss": 0.733,
381
  "step": 620
382
  },
383
  {
384
  "epoch": 0.57,
385
  "learning_rate": 5.655296229802514e-06,
386
+ "loss": 0.7437,
387
  "step": 630
388
  },
389
  {
390
  "epoch": 0.57,
391
  "learning_rate": 5.745062836624776e-06,
392
+ "loss": 0.7644,
393
  "step": 640
394
  },
395
  {
396
  "epoch": 0.58,
397
  "learning_rate": 5.834829443447038e-06,
398
+ "loss": 0.7125,
399
  "step": 650
400
  },
401
  {
402
  "epoch": 0.59,
403
  "learning_rate": 5.9245960502693004e-06,
404
+ "loss": 0.7146,
405
  "step": 660
406
  },
407
  {
408
  "epoch": 0.6,
409
  "learning_rate": 6.0143626570915625e-06,
410
+ "loss": 0.7201,
411
  "step": 670
412
  },
413
  {
414
  "epoch": 0.61,
415
  "learning_rate": 6.1041292639138246e-06,
416
+ "loss": 0.6851,
417
  "step": 680
418
  },
419
  {
420
  "epoch": 0.62,
421
  "learning_rate": 6.193895870736087e-06,
422
+ "loss": 0.6417,
423
  "step": 690
424
  },
425
  {
426
  "epoch": 0.63,
427
  "learning_rate": 6.283662477558349e-06,
428
+ "loss": 0.6656,
429
  "step": 700
430
  },
431
  {
432
  "epoch": 0.64,
433
  "learning_rate": 6.373429084380611e-06,
434
+ "loss": 0.6122,
435
  "step": 710
436
  },
437
  {
438
  "epoch": 0.65,
439
  "learning_rate": 6.463195691202873e-06,
440
+ "loss": 0.6536,
441
  "step": 720
442
  },
443
  {
444
  "epoch": 0.65,
445
  "learning_rate": 6.552962298025135e-06,
446
+ "loss": 0.6315,
447
  "step": 730
448
  },
449
  {
450
  "epoch": 0.66,
451
  "learning_rate": 6.642728904847396e-06,
452
+ "loss": 0.6618,
453
  "step": 740
454
  },
455
  {
456
  "epoch": 0.67,
457
  "learning_rate": 6.732495511669659e-06,
458
+ "loss": 0.6346,
459
  "step": 750
460
  },
461
  {
462
  "epoch": 0.68,
463
  "learning_rate": 6.822262118491922e-06,
464
+ "loss": 0.6866,
465
  "step": 760
466
  },
467
  {
468
  "epoch": 0.69,
469
  "learning_rate": 6.912028725314183e-06,
470
+ "loss": 0.6093,
471
  "step": 770
472
  },
473
  {
474
  "epoch": 0.7,
475
  "learning_rate": 7.001795332136446e-06,
476
+ "loss": 0.6116,
477
  "step": 780
478
  },
479
  {
480
  "epoch": 0.71,
481
  "learning_rate": 7.091561938958707e-06,
482
+ "loss": 0.602,
483
  "step": 790
484
  },
485
  {
486
  "epoch": 0.72,
487
  "learning_rate": 7.18132854578097e-06,
488
+ "loss": 0.6266,
489
  "step": 800
490
  },
491
  {
492
  "epoch": 0.73,
493
  "learning_rate": 7.271095152603231e-06,
494
+ "loss": 0.6082,
495
  "step": 810
496
  },
497
  {
498
  "epoch": 0.74,
499
  "learning_rate": 7.360861759425494e-06,
500
+ "loss": 0.6194,
501
  "step": 820
502
  },
503
  {
504
  "epoch": 0.74,
505
  "learning_rate": 7.4506283662477555e-06,
506
+ "loss": 0.6277,
507
  "step": 830
508
  },
509
  {
510
  "epoch": 0.75,
511
  "learning_rate": 7.540394973070018e-06,
512
+ "loss": 0.609,
513
  "step": 840
514
  },
515
  {
516
  "epoch": 0.76,
517
  "learning_rate": 7.630161579892281e-06,
518
+ "loss": 0.6252,
519
  "step": 850
520
  },
521
  {
522
  "epoch": 0.77,
523
  "learning_rate": 7.719928186714543e-06,
524
+ "loss": 0.5813,
525
  "step": 860
526
  },
527
  {
528
  "epoch": 0.78,
529
  "learning_rate": 7.809694793536805e-06,
530
+ "loss": 0.5799,
531
  "step": 870
532
  },
533
  {
534
  "epoch": 0.79,
535
  "learning_rate": 7.899461400359067e-06,
536
+ "loss": 0.6398,
537
  "step": 880
538
  },
539
  {
540
  "epoch": 0.8,
541
  "learning_rate": 7.98922800718133e-06,
542
+ "loss": 0.5549,
543
  "step": 890
544
  },
545
  {
546
  "epoch": 0.81,
547
  "learning_rate": 8.07899461400359e-06,
548
+ "loss": 0.6023,
549
  "step": 900
550
  },
551
  {
552
  "epoch": 0.82,
553
  "learning_rate": 8.168761220825854e-06,
554
+ "loss": 0.582,
555
  "step": 910
556
  },
557
  {
558
  "epoch": 0.83,
559
  "learning_rate": 8.258527827648117e-06,
560
+ "loss": 0.544,
561
  "step": 920
562
  },
563
  {
564
  "epoch": 0.83,
565
  "learning_rate": 8.348294434470378e-06,
566
+ "loss": 0.5912,
567
  "step": 930
568
  },
569
  {
570
  "epoch": 0.84,
571
  "learning_rate": 8.43806104129264e-06,
572
+ "loss": 0.5461,
573
  "step": 940
574
  },
575
  {
576
  "epoch": 0.85,
577
  "learning_rate": 8.527827648114902e-06,
578
+ "loss": 0.5238,
579
  "step": 950
580
  },
581
  {
582
  "epoch": 0.86,
583
  "learning_rate": 8.617594254937165e-06,
584
+ "loss": 0.5558,
585
  "step": 960
586
  },
587
  {
588
  "epoch": 0.87,
589
  "learning_rate": 8.707360861759426e-06,
590
+ "loss": 0.5371,
591
  "step": 970
592
  },
593
  {
594
  "epoch": 0.88,
595
  "learning_rate": 8.797127468581689e-06,
596
+ "loss": 0.5529,
597
  "step": 980
598
  },
599
  {
600
  "epoch": 0.89,
601
  "learning_rate": 8.88689407540395e-06,
602
+ "loss": 0.5691,
603
  "step": 990
604
  },
605
  {
606
  "epoch": 0.9,
607
  "learning_rate": 8.976660682226211e-06,
608
+ "loss": 0.5567,
609
  "step": 1000
610
  },
611
  {
612
  "epoch": 0.91,
613
  "learning_rate": 9.066427289048474e-06,
614
+ "loss": 0.5135,
615
  "step": 1010
616
  },
617
  {
618
  "epoch": 0.92,
619
  "learning_rate": 9.156193895870736e-06,
620
+ "loss": 0.5099,
621
  "step": 1020
622
  },
623
  {
624
  "epoch": 0.92,
625
  "learning_rate": 9.245960502692998e-06,
626
+ "loss": 0.5422,
627
  "step": 1030
628
  },
629
  {
630
  "epoch": 0.93,
631
  "learning_rate": 9.33572710951526e-06,
632
+ "loss": 0.5106,
633
  "step": 1040
634
  },
635
  {
636
  "epoch": 0.94,
637
  "learning_rate": 9.425493716337523e-06,
638
+ "loss": 0.4892,
639
  "step": 1050
640
  },
641
  {
642
  "epoch": 0.95,
643
  "learning_rate": 9.515260323159784e-06,
644
+ "loss": 0.4541,
645
  "step": 1060
646
  },
647
  {
648
  "epoch": 0.96,
649
  "learning_rate": 9.605026929982047e-06,
650
+ "loss": 0.4675,
651
  "step": 1070
652
  },
653
  {
654
  "epoch": 0.97,
655
  "learning_rate": 9.694793536804308e-06,
656
+ "loss": 0.5123,
657
  "step": 1080
658
  },
659
  {
660
  "epoch": 0.98,
661
  "learning_rate": 9.784560143626571e-06,
662
+ "loss": 0.5089,
663
  "step": 1090
664
  },
665
  {
666
  "epoch": 0.99,
667
  "learning_rate": 9.874326750448834e-06,
668
+ "loss": 0.5233,
669
  "step": 1100
670
  },
671
  {
672
  "epoch": 1.0,
673
  "learning_rate": 9.964093357271095e-06,
674
+ "loss": 0.5622,
675
  "step": 1110
676
  },
677
  {
678
  "epoch": 1.0,
679
  "eval_accuracy": {
680
+ "accuracy": 0.8736263736263736
681
  },
682
  "eval_f1": {
683
+ "f1": 0.8653360894930927
684
  },
685
+ "eval_loss": 0.4108576774597168,
686
  "eval_precision": {
687
+ "precision": 0.8669392826942903
688
  },
689
  "eval_recall": {
690
+ "recall": 0.8641606609285752
691
  },
692
+ "eval_runtime": 236.9507,
693
+ "eval_samples_per_second": 75.273,
694
+ "eval_steps_per_second": 4.706,
695
  "step": 1114
696
  },
697
  {
698
  "epoch": 1.0,
699
  "learning_rate": 1.0053859964093358e-05,
700
+ "loss": 0.4957,
701
  "step": 1120
702
  },
703
  {
704
  "epoch": 1.01,
705
  "learning_rate": 1.0143626570915619e-05,
706
+ "loss": 0.5729,
707
  "step": 1130
708
  },
709
  {
710
  "epoch": 1.02,
711
  "learning_rate": 1.0233393177737882e-05,
712
+ "loss": 0.4884,
713
  "step": 1140
714
  },
715
  {
716
  "epoch": 1.03,
717
  "learning_rate": 1.0323159784560143e-05,
718
+ "loss": 0.4744,
719
  "step": 1150
720
  },
721
  {
722
  "epoch": 1.04,
723
  "learning_rate": 1.0412926391382406e-05,
724
+ "loss": 0.4683,
725
  "step": 1160
726
  },
727
  {
728
  "epoch": 1.05,
729
  "learning_rate": 1.0502692998204669e-05,
730
+ "loss": 0.5,
731
  "step": 1170
732
  },
733
  {
734
  "epoch": 1.06,
735
  "learning_rate": 1.059245960502693e-05,
736
+ "loss": 0.4649,
737
  "step": 1180
738
  },
739
  {
740
  "epoch": 1.07,
741
  "learning_rate": 1.0682226211849193e-05,
742
+ "loss": 0.508,
743
  "step": 1190
744
  },
745
  {
746
  "epoch": 1.08,
747
  "learning_rate": 1.0771992818671454e-05,
748
+ "loss": 0.4898,
749
  "step": 1200
750
  },
751
  {
752
  "epoch": 1.09,
753
  "learning_rate": 1.0861759425493717e-05,
754
+ "loss": 0.4722,
755
  "step": 1210
756
  },
757
  {
758
  "epoch": 1.09,
759
  "learning_rate": 1.0951526032315979e-05,
760
+ "loss": 0.4814,
761
  "step": 1220
762
  },
763
  {
764
  "epoch": 1.1,
765
  "learning_rate": 1.1041292639138241e-05,
766
+ "loss": 0.4505,
767
  "step": 1230
768
  },
769
  {
770
  "epoch": 1.11,
771
  "learning_rate": 1.1131059245960503e-05,
772
+ "loss": 0.4505,
773
  "step": 1240
774
  },
775
  {
776
  "epoch": 1.12,
777
  "learning_rate": 1.1220825852782766e-05,
778
+ "loss": 0.4187,
779
  "step": 1250
780
  },
781
  {
782
  "epoch": 1.13,
783
  "learning_rate": 1.1310592459605028e-05,
784
+ "loss": 0.4603,
785
  "step": 1260
786
  },
787
  {
788
  "epoch": 1.14,
789
  "learning_rate": 1.140035906642729e-05,
790
+ "loss": 0.4721,
791
  "step": 1270
792
  },
793
  {
794
  "epoch": 1.15,
795
  "learning_rate": 1.1490125673249553e-05,
796
+ "loss": 0.4368,
797
  "step": 1280
798
  },
799
  {
800
  "epoch": 1.16,
801
  "learning_rate": 1.1579892280071814e-05,
802
+ "loss": 0.4722,
803
  "step": 1290
804
  },
805
  {
806
  "epoch": 1.17,
807
  "learning_rate": 1.1669658886894077e-05,
808
+ "loss": 0.4718,
809
  "step": 1300
810
  },
811
  {
812
  "epoch": 1.18,
813
  "learning_rate": 1.1759425493716338e-05,
814
+ "loss": 0.4899,
815
  "step": 1310
816
  },
817
  {
818
  "epoch": 1.18,
819
  "learning_rate": 1.1849192100538601e-05,
820
+ "loss": 0.4758,
821
  "step": 1320
822
  },
823
  {
824
  "epoch": 1.19,
825
  "learning_rate": 1.1938958707360862e-05,
826
+ "loss": 0.4523,
827
  "step": 1330
828
  },
829
  {
830
  "epoch": 1.2,
831
  "learning_rate": 1.2028725314183125e-05,
832
+ "loss": 0.485,
833
  "step": 1340
834
  },
835
  {
836
  "epoch": 1.21,
837
  "learning_rate": 1.2118491921005388e-05,
838
+ "loss": 0.4977,
839
  "step": 1350
840
  },
841
  {
842
  "epoch": 1.22,
843
  "learning_rate": 1.2208258527827649e-05,
844
+ "loss": 0.4305,
845
  "step": 1360
846
  },
847
  {
848
  "epoch": 1.23,
849
  "learning_rate": 1.2298025134649912e-05,
850
+ "loss": 0.4221,
851
  "step": 1370
852
  },
853
  {
854
  "epoch": 1.24,
855
  "learning_rate": 1.2387791741472173e-05,
856
+ "loss": 0.4975,
857
  "step": 1380
858
  },
859
  {
860
  "epoch": 1.25,
861
  "learning_rate": 1.2477558348294434e-05,
862
+ "loss": 0.4506,
863
  "step": 1390
864
  },
865
  {
866
  "epoch": 1.26,
867
  "learning_rate": 1.2567324955116697e-05,
868
+ "loss": 0.4326,
869
  "step": 1400
870
  },
871
  {
872
  "epoch": 1.26,
873
  "learning_rate": 1.2657091561938959e-05,
874
+ "loss": 0.4479,
875
  "step": 1410
876
  },
877
  {
878
  "epoch": 1.27,
879
  "learning_rate": 1.2746858168761221e-05,
880
+ "loss": 0.4468,
881
  "step": 1420
882
  },
883
  {
884
  "epoch": 1.28,
885
  "learning_rate": 1.2836624775583483e-05,
886
+ "loss": 0.402,
887
  "step": 1430
888
  },
889
  {
890
  "epoch": 1.29,
891
  "learning_rate": 1.2926391382405746e-05,
892
+ "loss": 0.4711,
893
  "step": 1440
894
  },
895
  {
896
  "epoch": 1.3,
897
  "learning_rate": 1.3016157989228009e-05,
898
+ "loss": 0.4686,
899
  "step": 1450
900
  },
901
  {
902
  "epoch": 1.31,
903
  "learning_rate": 1.310592459605027e-05,
904
+ "loss": 0.4639,
905
  "step": 1460
906
  },
907
  {
908
  "epoch": 1.32,
909
  "learning_rate": 1.3195691202872531e-05,
910
+ "loss": 0.435,
911
  "step": 1470
912
  },
913
  {
914
  "epoch": 1.33,
915
  "learning_rate": 1.3285457809694792e-05,
916
+ "loss": 0.3868,
917
  "step": 1480
918
  },
919
  {
920
  "epoch": 1.34,
921
  "learning_rate": 1.3375224416517057e-05,
922
+ "loss": 0.4303,
923
  "step": 1490
924
  },
925
  {
926
  "epoch": 1.35,
927
  "learning_rate": 1.3464991023339318e-05,
928
+ "loss": 0.432,
929
  "step": 1500
930
  },
931
  {
932
  "epoch": 1.35,
933
  "learning_rate": 1.355475763016158e-05,
934
+ "loss": 0.4206,
935
  "step": 1510
936
  },
937
  {
938
  "epoch": 1.36,
939
  "learning_rate": 1.3644524236983844e-05,
940
+ "loss": 0.4282,
941
  "step": 1520
942
  },
943
  {
944
  "epoch": 1.37,
945
  "learning_rate": 1.3734290843806105e-05,
946
+ "loss": 0.4284,
947
  "step": 1530
948
  },
949
  {
950
  "epoch": 1.38,
951
  "learning_rate": 1.3824057450628366e-05,
952
+ "loss": 0.4096,
953
  "step": 1540
954
  },
955
  {
956
  "epoch": 1.39,
957
  "learning_rate": 1.3913824057450627e-05,
958
+ "loss": 0.4259,
959
  "step": 1550
960
  },
961
  {
962
  "epoch": 1.4,
963
  "learning_rate": 1.4003590664272892e-05,
964
+ "loss": 0.4436,
965
  "step": 1560
966
  },
967
  {
968
  "epoch": 1.41,
969
  "learning_rate": 1.4093357271095153e-05,
970
+ "loss": 0.4453,
971
  "step": 1570
972
  },
973
  {
974
  "epoch": 1.42,
975
  "learning_rate": 1.4183123877917415e-05,
976
+ "loss": 0.4065,
977
  "step": 1580
978
  },
979
  {
980
  "epoch": 1.43,
981
  "learning_rate": 1.4272890484739679e-05,
982
+ "loss": 0.4011,
983
  "step": 1590
984
  },
985
  {
986
  "epoch": 1.44,
987
  "learning_rate": 1.436265709156194e-05,
988
+ "loss": 0.4488,
989
  "step": 1600
990
  },
991
  {
992
  "epoch": 1.44,
993
  "learning_rate": 1.4452423698384202e-05,
994
+ "loss": 0.4122,
995
  "step": 1610
996
  },
997
  {
998
  "epoch": 1.45,
999
  "learning_rate": 1.4542190305206463e-05,
1000
+ "loss": 0.429,
1001
  "step": 1620
1002
  },
1003
  {
1004
  "epoch": 1.46,
1005
  "learning_rate": 1.4631956912028727e-05,
1006
+ "loss": 0.4377,
1007
  "step": 1630
1008
  },
1009
  {
1010
  "epoch": 1.47,
1011
  "learning_rate": 1.4721723518850989e-05,
1012
+ "loss": 0.4186,
1013
  "step": 1640
1014
  },
1015
  {
1016
  "epoch": 1.48,
1017
  "learning_rate": 1.481149012567325e-05,
1018
+ "loss": 0.4063,
1019
  "step": 1650
1020
  },
1021
  {
1022
  "epoch": 1.49,
1023
  "learning_rate": 1.4901256732495511e-05,
1024
+ "loss": 0.4055,
1025
  "step": 1660
1026
  },
1027
  {
1028
  "epoch": 1.5,
1029
  "learning_rate": 1.4991023339317776e-05,
1030
+ "loss": 0.4173,
1031
  "step": 1670
1032
  },
1033
  {
1034
  "epoch": 1.51,
1035
  "learning_rate": 1.5080789946140037e-05,
1036
+ "loss": 0.4361,
1037
  "step": 1680
1038
  },
1039
  {
1040
  "epoch": 1.52,
1041
  "learning_rate": 1.5170556552962298e-05,
1042
+ "loss": 0.4528,
1043
  "step": 1690
1044
  },
1045
  {
1046
  "epoch": 1.53,
1047
  "learning_rate": 1.5260323159784563e-05,
1048
+ "loss": 0.3665,
1049
  "step": 1700
1050
  },
1051
  {
1052
  "epoch": 1.53,
1053
  "learning_rate": 1.5350089766606824e-05,
1054
+ "loss": 0.3913,
1055
  "step": 1710
1056
  },
1057
  {
1058
  "epoch": 1.54,
1059
  "learning_rate": 1.5439856373429085e-05,
1060
+ "loss": 0.4002,
1061
  "step": 1720
1062
  },
1063
  {
1064
  "epoch": 1.55,
1065
  "learning_rate": 1.5529622980251346e-05,
1066
+ "loss": 0.4004,
1067
  "step": 1730
1068
  },
1069
  {
1070
  "epoch": 1.56,
1071
  "learning_rate": 1.561938958707361e-05,
1072
+ "loss": 0.4432,
1073
  "step": 1740
1074
  },
1075
  {
1076
  "epoch": 1.57,
1077
  "learning_rate": 1.5709156193895872e-05,
1078
+ "loss": 0.3743,
1079
  "step": 1750
1080
  },
1081
  {
1082
  "epoch": 1.58,
1083
  "learning_rate": 1.5798922800718133e-05,
1084
+ "loss": 0.3972,
1085
  "step": 1760
1086
  },
1087
  {
1088
  "epoch": 1.59,
1089
  "learning_rate": 1.5888689407540398e-05,
1090
+ "loss": 0.368,
1091
  "step": 1770
1092
  },
1093
  {
1094
  "epoch": 1.6,
1095
  "learning_rate": 1.597845601436266e-05,
1096
+ "loss": 0.4525,
1097
  "step": 1780
1098
  },
1099
  {
1100
  "epoch": 1.61,
1101
  "learning_rate": 1.606822262118492e-05,
1102
+ "loss": 0.3962,
1103
  "step": 1790
1104
  },
1105
  {
1106
  "epoch": 1.61,
1107
  "learning_rate": 1.615798922800718e-05,
1108
+ "loss": 0.3888,
1109
  "step": 1800
1110
  },
1111
  {
1112
  "epoch": 1.62,
1113
  "learning_rate": 1.6247755834829446e-05,
1114
+ "loss": 0.4111,
1115
  "step": 1810
1116
  },
1117
  {
1118
  "epoch": 1.63,
1119
  "learning_rate": 1.6337522441651707e-05,
1120
+ "loss": 0.4084,
1121
  "step": 1820
1122
  },
1123
  {
1124
  "epoch": 1.64,
1125
  "learning_rate": 1.642728904847397e-05,
1126
+ "loss": 0.4181,
1127
  "step": 1830
1128
  },
1129
  {
1130
  "epoch": 1.65,
1131
  "learning_rate": 1.6517055655296233e-05,
1132
+ "loss": 0.4022,
1133
  "step": 1840
1134
  },
1135
  {
1136
  "epoch": 1.66,
1137
  "learning_rate": 1.6606822262118494e-05,
1138
+ "loss": 0.3886,
1139
  "step": 1850
1140
  },
1141
  {
1142
  "epoch": 1.67,
1143
  "learning_rate": 1.6696588868940756e-05,
1144
+ "loss": 0.4092,
1145
  "step": 1860
1146
  },
1147
  {
1148
  "epoch": 1.68,
1149
  "learning_rate": 1.6786355475763017e-05,
1150
+ "loss": 0.3843,
1151
  "step": 1870
1152
  },
1153
  {
1154
  "epoch": 1.69,
1155
  "learning_rate": 1.687612208258528e-05,
1156
+ "loss": 0.4237,
1157
  "step": 1880
1158
  },
1159
  {
1160
  "epoch": 1.7,
1161
  "learning_rate": 1.6965888689407543e-05,
1162
+ "loss": 0.3801,
1163
  "step": 1890
1164
  },
1165
  {
1166
  "epoch": 1.7,
1167
  "learning_rate": 1.7055655296229804e-05,
1168
+ "loss": 0.385,
1169
  "step": 1900
1170
  },
1171
  {
1172
  "epoch": 1.71,
1173
  "learning_rate": 1.7145421903052065e-05,
1174
+ "loss": 0.4452,
1175
  "step": 1910
1176
  },
1177
  {
1178
  "epoch": 1.72,
1179
  "learning_rate": 1.723518850987433e-05,
1180
+ "loss": 0.5147,
1181
  "step": 1920
1182
  },
1183
  {
1184
  "epoch": 1.73,
1185
  "learning_rate": 1.732495511669659e-05,
1186
+ "loss": 0.353,
1187
  "step": 1930
1188
  },
1189
  {
1190
  "epoch": 1.74,
1191
  "learning_rate": 1.7414721723518852e-05,
1192
+ "loss": 0.3816,
1193
  "step": 1940
1194
  },
1195
  {
1196
  "epoch": 1.75,
1197
  "learning_rate": 1.7504488330341113e-05,
1198
+ "loss": 0.361,
1199
  "step": 1950
1200
  },
1201
  {
1202
  "epoch": 1.76,
1203
  "learning_rate": 1.7594254937163378e-05,
1204
+ "loss": 0.3884,
1205
  "step": 1960
1206
  },
1207
  {
1208
  "epoch": 1.77,
1209
  "learning_rate": 1.768402154398564e-05,
1210
+ "loss": 0.3391,
1211
  "step": 1970
1212
  },
1213
  {
1214
  "epoch": 1.78,
1215
  "learning_rate": 1.77737881508079e-05,
1216
+ "loss": 0.4211,
1217
  "step": 1980
1218
  },
1219
  {
1220
  "epoch": 1.79,
1221
  "learning_rate": 1.786355475763016e-05,
1222
+ "loss": 0.3796,
1223
  "step": 1990
1224
  },
1225
  {
1226
  "epoch": 1.79,
1227
  "learning_rate": 1.7953321364452423e-05,
1228
+ "loss": 0.3881,
1229
  "step": 2000
1230
  },
1231
  {
1232
  "epoch": 1.8,
1233
  "learning_rate": 1.8043087971274687e-05,
1234
+ "loss": 0.3904,
1235
  "step": 2010
1236
  },
1237
  {
1238
  "epoch": 1.81,
1239
  "learning_rate": 1.813285457809695e-05,
1240
+ "loss": 0.3845,
1241
  "step": 2020
1242
  },
1243
  {
1244
  "epoch": 1.82,
1245
  "learning_rate": 1.822262118491921e-05,
1246
+ "loss": 0.3721,
1247
  "step": 2030
1248
  },
1249
  {
1250
  "epoch": 1.83,
1251
  "learning_rate": 1.831238779174147e-05,
1252
+ "loss": 0.4141,
1253
  "step": 2040
1254
  },
1255
  {
1256
  "epoch": 1.84,
1257
  "learning_rate": 1.8402154398563732e-05,
1258
+ "loss": 0.3682,
1259
  "step": 2050
1260
  },
1261
  {
1262
  "epoch": 1.85,
1263
  "learning_rate": 1.8491921005385997e-05,
1264
+ "loss": 0.3826,
1265
  "step": 2060
1266
  },
1267
  {
1268
  "epoch": 1.86,
1269
  "learning_rate": 1.8581687612208258e-05,
1270
+ "loss": 0.3588,
1271
  "step": 2070
1272
  },
1273
  {
1274
  "epoch": 1.87,
1275
  "learning_rate": 1.867145421903052e-05,
1276
+ "loss": 0.3663,
1277
  "step": 2080
1278
  },
1279
  {
1280
  "epoch": 1.87,
1281
  "learning_rate": 1.8761220825852784e-05,
1282
+ "loss": 0.4186,
1283
  "step": 2090
1284
  },
1285
  {
1286
  "epoch": 1.88,
1287
  "learning_rate": 1.8850987432675045e-05,
1288
+ "loss": 0.3756,
1289
  "step": 2100
1290
  },
1291
  {
1292
  "epoch": 1.89,
1293
  "learning_rate": 1.8940754039497306e-05,
1294
+ "loss": 0.3655,
1295
  "step": 2110
1296
  },
1297
  {
1298
  "epoch": 1.9,
1299
  "learning_rate": 1.9030520646319568e-05,
1300
+ "loss": 0.4158,
1301
  "step": 2120
1302
  },
1303
  {
1304
  "epoch": 1.91,
1305
  "learning_rate": 1.9120287253141832e-05,
1306
+ "loss": 0.3985,
1307
  "step": 2130
1308
  },
1309
  {
1310
  "epoch": 1.92,
1311
  "learning_rate": 1.9210053859964093e-05,
1312
+ "loss": 0.3955,
1313
  "step": 2140
1314
  },
1315
  {
1316
  "epoch": 1.93,
1317
  "learning_rate": 1.9299820466786355e-05,
1318
+ "loss": 0.3757,
1319
  "step": 2150
1320
  },
1321
  {
1322
  "epoch": 1.94,
1323
  "learning_rate": 1.9389587073608616e-05,
1324
+ "loss": 0.3712,
1325
  "step": 2160
1326
  },
1327
  {
1328
  "epoch": 1.95,
1329
  "learning_rate": 1.947935368043088e-05,
1330
+ "loss": 0.3723,
1331
  "step": 2170
1332
  },
1333
  {
1334
  "epoch": 1.96,
1335
  "learning_rate": 1.9569120287253142e-05,
1336
+ "loss": 0.367,
1337
  "step": 2180
1338
  },
1339
  {
1340
  "epoch": 1.96,
1341
  "learning_rate": 1.9658886894075403e-05,
1342
+ "loss": 0.3659,
1343
  "step": 2190
1344
  },
1345
  {
1346
  "epoch": 1.97,
1347
  "learning_rate": 1.9748653500897668e-05,
1348
+ "loss": 0.3752,
1349
  "step": 2200
1350
  },
1351
  {
1352
  "epoch": 1.98,
1353
  "learning_rate": 1.983842010771993e-05,
1354
+ "loss": 0.318,
1355
  "step": 2210
1356
  },
1357
  {
1358
  "epoch": 1.99,
1359
  "learning_rate": 1.992818671454219e-05,
1360
+ "loss": 0.3696,
1361
  "step": 2220
1362
  },
1363
  {
1364
  "epoch": 2.0,
1365
  "eval_accuracy": {
1366
+ "accuracy": 0.9249271137026239
1367
  },
1368
  "eval_f1": {
1369
+ "f1": 0.9207602119769538
1370
  },
1371
+ "eval_loss": 0.2492757886648178,
1372
  "eval_precision": {
1373
+ "precision": 0.9199949527238288
1374
  },
1375
  "eval_recall": {
1376
+ "recall": 0.9219224853720074
1377
  },
1378
+ "eval_runtime": 166.1808,
1379
+ "eval_samples_per_second": 107.329,
1380
+ "eval_steps_per_second": 6.71,
1381
  "step": 2229
1382
  },
1383
  {
1384
  "epoch": 2.0,
1385
  "learning_rate": 2.001795332136445e-05,
1386
+ "loss": 0.3681,
1387
  "step": 2230
1388
  },
1389
  {
1390
  "epoch": 2.01,
1391
  "learning_rate": 2.0107719928186716e-05,
1392
+ "loss": 0.321,
1393
  "step": 2240
1394
  },
1395
  {
1396
  "epoch": 2.02,
1397
  "learning_rate": 2.0197486535008977e-05,
1398
+ "loss": 0.332,
1399
  "step": 2250
1400
  },
1401
  {
1402
  "epoch": 2.03,
1403
  "learning_rate": 2.0287253141831238e-05,
1404
+ "loss": 0.3343,
1405
  "step": 2260
1406
  },
1407
  {
1408
  "epoch": 2.04,
1409
  "learning_rate": 2.0377019748653503e-05,
1410
+ "loss": 0.3341,
1411
  "step": 2270
1412
  },
1413
  {
1414
  "epoch": 2.05,
1415
  "learning_rate": 2.0466786355475764e-05,
1416
+ "loss": 0.3473,
1417
  "step": 2280
1418
  },
1419
  {
1420
  "epoch": 2.05,
1421
  "learning_rate": 2.0556552962298025e-05,
1422
+ "loss": 0.3479,
1423
  "step": 2290
1424
  },
1425
  {
1426
  "epoch": 2.06,
1427
  "learning_rate": 2.0646319569120286e-05,
1428
+ "loss": 0.3235,
1429
  "step": 2300
1430
  },
1431
  {
1432
  "epoch": 2.07,
1433
  "learning_rate": 2.073608617594255e-05,
1434
+ "loss": 0.3455,
1435
  "step": 2310
1436
  },
1437
  {
1438
  "epoch": 2.08,
1439
  "learning_rate": 2.0825852782764812e-05,
1440
+ "loss": 0.3675,
1441
  "step": 2320
1442
  },
1443
  {
1444
  "epoch": 2.09,
1445
  "learning_rate": 2.0915619389587073e-05,
1446
+ "loss": 0.3267,
1447
  "step": 2330
1448
  },
1449
  {
1450
  "epoch": 2.1,
1451
  "learning_rate": 2.1005385996409338e-05,
1452
+ "loss": 0.3588,
1453
  "step": 2340
1454
  },
1455
  {
1456
  "epoch": 2.11,
1457
  "learning_rate": 2.10951526032316e-05,
1458
+ "loss": 0.3762,
1459
  "step": 2350
1460
  },
1461
  {
1462
  "epoch": 2.12,
1463
  "learning_rate": 2.118491921005386e-05,
1464
+ "loss": 0.3474,
1465
  "step": 2360
1466
  },
1467
  {
1468
  "epoch": 2.13,
1469
  "learning_rate": 2.1274685816876122e-05,
1470
+ "loss": 0.3614,
1471
  "step": 2370
1472
  },
1473
  {
1474
  "epoch": 2.14,
1475
  "learning_rate": 2.1364452423698386e-05,
1476
+ "loss": 0.3611,
1477
  "step": 2380
1478
  },
1479
  {
1480
  "epoch": 2.14,
1481
  "learning_rate": 2.1454219030520648e-05,
1482
+ "loss": 0.4051,
1483
  "step": 2390
1484
  },
1485
  {
1486
  "epoch": 2.15,
1487
  "learning_rate": 2.154398563734291e-05,
1488
+ "loss": 0.3314,
1489
  "step": 2400
1490
  },
1491
  {
1492
  "epoch": 2.16,
1493
  "learning_rate": 2.163375224416517e-05,
1494
+ "loss": 0.3427,
1495
  "step": 2410
1496
  },
1497
  {
1498
  "epoch": 2.17,
1499
  "learning_rate": 2.1723518850987435e-05,
1500
+ "loss": 0.2886,
1501
  "step": 2420
1502
  },
1503
  {
1504
  "epoch": 2.18,
1505
  "learning_rate": 2.1813285457809696e-05,
1506
+ "loss": 0.3262,
1507
  "step": 2430
1508
  },
1509
  {
1510
  "epoch": 2.19,
1511
  "learning_rate": 2.1903052064631957e-05,
1512
+ "loss": 0.3516,
1513
  "step": 2440
1514
  },
1515
  {
1516
  "epoch": 2.2,
1517
  "learning_rate": 2.199281867145422e-05,
1518
+ "loss": 0.3744,
1519
  "step": 2450
1520
  },
1521
  {
1522
  "epoch": 2.21,
1523
  "learning_rate": 2.2082585278276483e-05,
1524
+ "loss": 0.3545,
1525
  "step": 2460
1526
  },
1527
  {
1528
  "epoch": 2.22,
1529
  "learning_rate": 2.2172351885098744e-05,
1530
+ "loss": 0.2993,
1531
  "step": 2470
1532
  },
1533
  {
1534
  "epoch": 2.22,
1535
  "learning_rate": 2.2262118491921005e-05,
1536
+ "loss": 0.3412,
1537
  "step": 2480
1538
  },
1539
  {
1540
  "epoch": 2.23,
1541
  "learning_rate": 2.235188509874327e-05,
1542
+ "loss": 0.3823,
1543
  "step": 2490
1544
  },
1545
  {
1546
  "epoch": 2.24,
1547
  "learning_rate": 2.244165170556553e-05,
1548
+ "loss": 0.3206,
1549
  "step": 2500
1550
  },
1551
  {
1552
  "epoch": 2.25,
1553
  "learning_rate": 2.2531418312387792e-05,
1554
+ "loss": 0.2969,
1555
  "step": 2510
1556
  },
1557
  {
1558
  "epoch": 2.26,
1559
  "learning_rate": 2.2621184919210057e-05,
1560
+ "loss": 0.3306,
1561
  "step": 2520
1562
  },
1563
  {
1564
  "epoch": 2.27,
1565
  "learning_rate": 2.2710951526032318e-05,
1566
+ "loss": 0.3641,
1567
  "step": 2530
1568
  },
1569
  {
1570
  "epoch": 2.28,
1571
  "learning_rate": 2.280071813285458e-05,
1572
+ "loss": 0.3588,
1573
  "step": 2540
1574
  },
1575
  {
1576
  "epoch": 2.29,
1577
  "learning_rate": 2.289048473967684e-05,
1578
+ "loss": 0.3632,
1579
  "step": 2550
1580
  },
1581
  {
1582
  "epoch": 2.3,
1583
  "learning_rate": 2.2980251346499105e-05,
1584
+ "loss": 0.3072,
1585
  "step": 2560
1586
  },
1587
  {
1588
  "epoch": 2.31,
1589
  "learning_rate": 2.3070017953321366e-05,
1590
+ "loss": 0.3606,
1591
  "step": 2570
1592
  },
1593
  {
1594
  "epoch": 2.31,
1595
  "learning_rate": 2.3159784560143628e-05,
1596
+ "loss": 0.4114,
1597
  "step": 2580
1598
  },
1599
  {
1600
  "epoch": 2.32,
1601
  "learning_rate": 2.3249551166965892e-05,
1602
+ "loss": 0.282,
1603
  "step": 2590
1604
  },
1605
  {
1606
  "epoch": 2.33,
1607
  "learning_rate": 2.3339317773788153e-05,
1608
+ "loss": 0.3336,
1609
  "step": 2600
1610
  },
1611
  {
1612
  "epoch": 2.34,
1613
  "learning_rate": 2.3429084380610415e-05,
1614
+ "loss": 0.3406,
1615
  "step": 2610
1616
  },
1617
  {
1618
  "epoch": 2.35,
1619
  "learning_rate": 2.3518850987432676e-05,
1620
+ "loss": 0.3433,
1621
  "step": 2620
1622
  },
1623
  {
1624
  "epoch": 2.36,
1625
  "learning_rate": 2.360861759425494e-05,
1626
+ "loss": 0.3169,
1627
  "step": 2630
1628
  },
1629
  {
1630
  "epoch": 2.37,
1631
  "learning_rate": 2.3698384201077202e-05,
1632
+ "loss": 0.3855,
1633
  "step": 2640
1634
  },
1635
  {
1636
  "epoch": 2.38,
1637
  "learning_rate": 2.3788150807899463e-05,
1638
+ "loss": 0.3036,
1639
  "step": 2650
1640
  },
1641
  {
1642
  "epoch": 2.39,
1643
  "learning_rate": 2.3877917414721724e-05,
1644
+ "loss": 0.3076,
1645
  "step": 2660
1646
  },
1647
  {
1648
  "epoch": 2.4,
1649
  "learning_rate": 2.396768402154399e-05,
1650
+ "loss": 0.3052,
1651
  "step": 2670
1652
  },
1653
  {
1654
  "epoch": 2.4,
1655
  "learning_rate": 2.405745062836625e-05,
1656
+ "loss": 0.3253,
1657
  "step": 2680
1658
  },
1659
  {
1660
  "epoch": 2.41,
1661
  "learning_rate": 2.414721723518851e-05,
1662
+ "loss": 0.3235,
1663
  "step": 2690
1664
  },
1665
  {
1666
  "epoch": 2.42,
1667
  "learning_rate": 2.4236983842010776e-05,
1668
+ "loss": 0.2835,
1669
  "step": 2700
1670
  },
1671
  {
1672
  "epoch": 2.43,
1673
  "learning_rate": 2.4326750448833037e-05,
1674
+ "loss": 0.2824,
1675
  "step": 2710
1676
  },
1677
  {
1678
  "epoch": 2.44,
1679
  "learning_rate": 2.4416517055655298e-05,
1680
+ "loss": 0.3183,
1681
  "step": 2720
1682
  },
1683
  {
1684
  "epoch": 2.45,
1685
  "learning_rate": 2.450628366247756e-05,
1686
+ "loss": 0.3281,
1687
  "step": 2730
1688
  },
1689
  {
1690
  "epoch": 2.46,
1691
  "learning_rate": 2.4596050269299824e-05,
1692
+ "loss": 0.2939,
1693
  "step": 2740
1694
  },
1695
  {
1696
  "epoch": 2.47,
1697
  "learning_rate": 2.4685816876122085e-05,
1698
+ "loss": 0.3679,
1699
  "step": 2750
1700
  },
1701
  {
1702
  "epoch": 2.48,
1703
  "learning_rate": 2.4775583482944346e-05,
1704
+ "loss": 0.3165,
1705
  "step": 2760
1706
  },
1707
  {
1708
  "epoch": 2.48,
1709
  "learning_rate": 2.4865350089766608e-05,
1710
+ "loss": 0.3146,
1711
  "step": 2770
1712
  },
1713
  {
1714
  "epoch": 2.49,
1715
  "learning_rate": 2.495511669658887e-05,
1716
+ "loss": 0.2981,
1717
  "step": 2780
1718
  },
1719
  {
1720
  "epoch": 2.5,
1721
  "learning_rate": 2.5044883303411134e-05,
1722
+ "loss": 0.3099,
1723
  "step": 2790
1724
  },
1725
  {
1726
  "epoch": 2.51,
1727
  "learning_rate": 2.5134649910233395e-05,
1728
+ "loss": 0.267,
1729
  "step": 2800
1730
  },
1731
  {
1732
  "epoch": 2.52,
1733
  "learning_rate": 2.5224416517055656e-05,
1734
+ "loss": 0.3317,
1735
  "step": 2810
1736
  },
1737
  {
1738
  "epoch": 2.53,
1739
  "learning_rate": 2.5314183123877917e-05,
1740
+ "loss": 0.3241,
1741
  "step": 2820
1742
  },
1743
  {
1744
  "epoch": 2.54,
1745
  "learning_rate": 2.5403949730700182e-05,
1746
+ "loss": 0.371,
1747
  "step": 2830
1748
  },
1749
  {
1750
  "epoch": 2.55,
1751
  "learning_rate": 2.5493716337522443e-05,
1752
+ "loss": 0.3431,
1753
  "step": 2840
1754
  },
1755
  {
1756
  "epoch": 2.56,
1757
  "learning_rate": 2.5583482944344704e-05,
1758
+ "loss": 0.3218,
1759
  "step": 2850
1760
  },
1761
  {
1762
  "epoch": 2.57,
1763
  "learning_rate": 2.5673249551166965e-05,
1764
+ "loss": 0.3222,
1765
  "step": 2860
1766
  },
1767
  {
1768
  "epoch": 2.57,
1769
  "learning_rate": 2.5763016157989227e-05,
1770
+ "loss": 0.3176,
1771
  "step": 2870
1772
  },
1773
  {
1774
  "epoch": 2.58,
1775
  "learning_rate": 2.585278276481149e-05,
1776
+ "loss": 0.3256,
1777
  "step": 2880
1778
  },
1779
  {
1780
  "epoch": 2.59,
1781
  "learning_rate": 2.5942549371633752e-05,
1782
+ "loss": 0.2971,
1783
  "step": 2890
1784
  },
1785
  {
1786
  "epoch": 2.6,
1787
  "learning_rate": 2.6032315978456017e-05,
1788
+ "loss": 0.253,
1789
  "step": 2900
1790
  },
1791
  {
1792
  "epoch": 2.61,
1793
  "learning_rate": 2.6122082585278278e-05,
1794
+ "loss": 0.3261,
1795
  "step": 2910
1796
  },
1797
  {
1798
  "epoch": 2.62,
1799
  "learning_rate": 2.621184919210054e-05,
1800
+ "loss": 0.3325,
1801
  "step": 2920
1802
  },
1803
  {
1804
  "epoch": 2.63,
1805
  "learning_rate": 2.63016157989228e-05,
1806
+ "loss": 0.3005,
1807
  "step": 2930
1808
  },
1809
  {
1810
  "epoch": 2.64,
1811
  "learning_rate": 2.6391382405745062e-05,
1812
+ "loss": 0.2897,
1813
  "step": 2940
1814
  },
1815
  {
1816
  "epoch": 2.65,
1817
  "learning_rate": 2.6481149012567323e-05,
1818
+ "loss": 0.3249,
1819
  "step": 2950
1820
  },
1821
  {
1822
  "epoch": 2.66,
1823
  "learning_rate": 2.6570915619389584e-05,
1824
+ "loss": 0.3647,
1825
  "step": 2960
1826
  },
1827
  {
1828
  "epoch": 2.66,
1829
  "learning_rate": 2.6660682226211852e-05,
1830
+ "loss": 0.3034,
1831
  "step": 2970
1832
  },
1833
  {
1834
  "epoch": 2.67,
1835
  "learning_rate": 2.6750448833034114e-05,
1836
+ "loss": 0.2735,
1837
  "step": 2980
1838
  },
1839
  {
1840
  "epoch": 2.68,
1841
  "learning_rate": 2.6840215439856375e-05,
1842
+ "loss": 0.3122,
1843
  "step": 2990
1844
  },
1845
  {
1846
  "epoch": 2.69,
1847
  "learning_rate": 2.6929982046678636e-05,
1848
+ "loss": 0.2622,
1849
  "step": 3000
1850
  },
1851
  {
1852
  "epoch": 2.7,
1853
  "learning_rate": 2.7019748653500897e-05,
1854
+ "loss": 0.322,
1855
  "step": 3010
1856
  },
1857
  {
1858
  "epoch": 2.71,
1859
  "learning_rate": 2.710951526032316e-05,
1860
+ "loss": 0.2671,
1861
  "step": 3020
1862
  },
1863
  {
1864
  "epoch": 2.72,
1865
  "learning_rate": 2.719928186714542e-05,
1866
+ "loss": 0.3392,
1867
  "step": 3030
1868
  },
1869
  {
1870
  "epoch": 2.73,
1871
  "learning_rate": 2.7289048473967688e-05,
1872
+ "loss": 0.2507,
1873
  "step": 3040
1874
  },
1875
  {
1876
  "epoch": 2.74,
1877
  "learning_rate": 2.737881508078995e-05,
1878
+ "loss": 0.294,
1879
  "step": 3050
1880
  },
1881
  {
1882
  "epoch": 2.75,
1883
  "learning_rate": 2.746858168761221e-05,
1884
+ "loss": 0.3119,
1885
  "step": 3060
1886
  },
1887
  {
1888
  "epoch": 2.75,
1889
  "learning_rate": 2.755834829443447e-05,
1890
+ "loss": 0.2956,
1891
  "step": 3070
1892
  },
1893
  {
1894
  "epoch": 2.76,
1895
  "learning_rate": 2.7648114901256732e-05,
1896
+ "loss": 0.2744,
1897
  "step": 3080
1898
  },
1899
  {
1900
  "epoch": 2.77,
1901
  "learning_rate": 2.7737881508078994e-05,
1902
+ "loss": 0.279,
1903
  "step": 3090
1904
  },
1905
  {
1906
  "epoch": 2.78,
1907
  "learning_rate": 2.7827648114901255e-05,
1908
+ "loss": 0.379,
1909
  "step": 3100
1910
  },
1911
  {
1912
  "epoch": 2.79,
1913
  "learning_rate": 2.7917414721723523e-05,
1914
+ "loss": 0.2667,
1915
  "step": 3110
1916
  },
1917
  {
1918
  "epoch": 2.8,
1919
  "learning_rate": 2.8007181328545784e-05,
1920
+ "loss": 0.2732,
1921
  "step": 3120
1922
  },
1923
  {
1924
  "epoch": 2.81,
1925
  "learning_rate": 2.8096947935368045e-05,
1926
+ "loss": 0.311,
1927
  "step": 3130
1928
  },
1929
  {
1930
  "epoch": 2.82,
1931
  "learning_rate": 2.8186714542190307e-05,
1932
+ "loss": 0.3404,
1933
  "step": 3140
1934
  },
1935
  {
1936
  "epoch": 2.83,
1937
  "learning_rate": 2.8276481149012568e-05,
1938
+ "loss": 0.2479,
1939
  "step": 3150
1940
  },
1941
  {
1942
  "epoch": 2.83,
1943
  "learning_rate": 2.836624775583483e-05,
1944
+ "loss": 0.3057,
1945
  "step": 3160
1946
  },
1947
  {
1948
  "epoch": 2.84,
1949
  "learning_rate": 2.845601436265709e-05,
1950
+ "loss": 0.3075,
1951
  "step": 3170
1952
  },
1953
  {
1954
  "epoch": 2.85,
1955
  "learning_rate": 2.8545780969479358e-05,
1956
+ "loss": 0.3008,
1957
  "step": 3180
1958
  },
1959
  {
1960
  "epoch": 2.86,
1961
  "learning_rate": 2.863554757630162e-05,
1962
+ "loss": 0.2372,
1963
  "step": 3190
1964
  },
1965
  {
1966
  "epoch": 2.87,
1967
  "learning_rate": 2.872531418312388e-05,
1968
+ "loss": 0.2617,
1969
  "step": 3200
1970
  },
1971
  {
1972
  "epoch": 2.88,
1973
  "learning_rate": 2.8815080789946142e-05,
1974
+ "loss": 0.3,
1975
  "step": 3210
1976
  },
1977
  {
1978
  "epoch": 2.89,
1979
  "learning_rate": 2.8904847396768403e-05,
1980
+ "loss": 0.2749,
1981
  "step": 3220
1982
  },
1983
  {
1984
  "epoch": 2.9,
1985
  "learning_rate": 2.8994614003590664e-05,
1986
+ "loss": 0.2742,
1987
  "step": 3230
1988
  },
1989
  {
1990
  "epoch": 2.91,
1991
  "learning_rate": 2.9084380610412926e-05,
1992
+ "loss": 0.2844,
1993
  "step": 3240
1994
  },
1995
  {
1996
  "epoch": 2.92,
1997
  "learning_rate": 2.9174147217235194e-05,
1998
+ "loss": 0.2909,
1999
  "step": 3250
2000
  },
2001
  {
2002
  "epoch": 2.92,
2003
  "learning_rate": 2.9263913824057455e-05,
2004
+ "loss": 0.3045,
2005
  "step": 3260
2006
  },
2007
  {
2008
  "epoch": 2.93,
2009
  "learning_rate": 2.9353680430879716e-05,
2010
+ "loss": 0.2875,
2011
  "step": 3270
2012
  },
2013
  {
2014
  "epoch": 2.94,
2015
  "learning_rate": 2.9443447037701977e-05,
2016
+ "loss": 0.2963,
2017
  "step": 3280
2018
  },
2019
  {
2020
  "epoch": 2.95,
2021
  "learning_rate": 2.953321364452424e-05,
2022
+ "loss": 0.314,
2023
  "step": 3290
2024
  },
2025
  {
2026
  "epoch": 2.96,
2027
  "learning_rate": 2.96229802513465e-05,
2028
+ "loss": 0.3005,
2029
  "step": 3300
2030
  },
2031
  {
2032
  "epoch": 2.97,
2033
  "learning_rate": 2.971274685816876e-05,
2034
+ "loss": 0.3127,
2035
  "step": 3310
2036
  },
2037
  {
2038
  "epoch": 2.98,
2039
  "learning_rate": 2.9802513464991022e-05,
2040
+ "loss": 0.3113,
2041
  "step": 3320
2042
  },
2043
  {
2044
  "epoch": 2.99,
2045
  "learning_rate": 2.989228007181329e-05,
2046
+ "loss": 0.2886,
2047
  "step": 3330
2048
  },
2049
  {
2050
  "epoch": 3.0,
2051
  "learning_rate": 2.998204667863555e-05,
2052
+ "loss": 0.321,
2053
  "step": 3340
2054
  },
2055
  {
2056
  "epoch": 3.0,
2057
  "eval_accuracy": {
2058
+ "accuracy": 0.9410742318905584
2059
  },
2060
  "eval_f1": {
2061
+ "f1": 0.9378351696354423
2062
  },
2063
+ "eval_loss": 0.18490658700466156,
2064
  "eval_precision": {
2065
+ "precision": 0.9376663800198108
2066
  },
2067
  "eval_recall": {
2068
+ "recall": 0.9381803360170522
2069
  },
2070
+ "eval_runtime": 165.2584,
2071
+ "eval_samples_per_second": 107.928,
2072
+ "eval_steps_per_second": 6.747,
2073
  "step": 3344
2074
  },
2075
  {
2076
  "epoch": 3.01,
2077
  "learning_rate": 3.0071813285457812e-05,
2078
+ "loss": 0.3252,
2079
  "step": 3350
2080
  },
2081
  {
2082
  "epoch": 3.01,
2083
  "learning_rate": 3.0161579892280074e-05,
2084
+ "loss": 0.2945,
2085
  "step": 3360
2086
  },
2087
  {
2088
  "epoch": 3.02,
2089
  "learning_rate": 3.0251346499102335e-05,
2090
+ "loss": 0.2584,
2091
  "step": 3370
2092
  },
2093
  {
2094
  "epoch": 3.03,
2095
  "learning_rate": 3.0341113105924596e-05,
2096
+ "loss": 0.2878,
2097
  "step": 3380
2098
  },
2099
  {
2100
  "epoch": 3.04,
2101
  "learning_rate": 3.0430879712746857e-05,
2102
+ "loss": 0.2618,
2103
  "step": 3390
2104
  },
2105
  {
2106
  "epoch": 3.05,
2107
  "learning_rate": 3.0520646319569125e-05,
2108
+ "loss": 0.3356,
2109
  "step": 3400
2110
  },
2111
  {
2112
  "epoch": 3.06,
2113
  "learning_rate": 3.061041292639138e-05,
2114
+ "loss": 0.2706,
2115
  "step": 3410
2116
  },
2117
  {
2118
  "epoch": 3.07,
2119
  "learning_rate": 3.070017953321365e-05,
2120
+ "loss": 0.2821,
2121
  "step": 3420
2122
  },
2123
  {
2124
  "epoch": 3.08,
2125
  "learning_rate": 3.0789946140035906e-05,
2126
+ "loss": 0.2634,
2127
  "step": 3430
2128
  },
2129
  {
2130
  "epoch": 3.09,
2131
  "learning_rate": 3.087971274685817e-05,
2132
+ "loss": 0.2927,
2133
  "step": 3440
2134
  },
2135
  {
2136
  "epoch": 3.09,
2137
  "learning_rate": 3.096947935368043e-05,
2138
+ "loss": 0.2406,
2139
  "step": 3450
2140
  },
2141
  {
2142
  "epoch": 3.1,
2143
  "learning_rate": 3.105924596050269e-05,
2144
+ "loss": 0.2871,
2145
  "step": 3460
2146
  },
2147
  {
2148
  "epoch": 3.11,
2149
  "learning_rate": 3.114901256732496e-05,
2150
+ "loss": 0.2389,
2151
  "step": 3470
2152
  },
2153
  {
2154
  "epoch": 3.12,
2155
  "learning_rate": 3.123877917414722e-05,
2156
+ "loss": 0.2782,
2157
  "step": 3480
2158
  },
2159
  {
2160
  "epoch": 3.13,
2161
  "learning_rate": 3.132854578096948e-05,
2162
+ "loss": 0.2864,
2163
  "step": 3490
2164
  },
2165
  {
2166
  "epoch": 3.14,
2167
  "learning_rate": 3.1418312387791744e-05,
2168
+ "loss": 0.29,
2169
  "step": 3500
2170
  },
2171
  {
2172
  "epoch": 3.15,
2173
  "learning_rate": 3.1508078994614e-05,
2174
+ "loss": 0.2089,
2175
  "step": 3510
2176
  },
2177
  {
2178
  "epoch": 3.16,
2179
  "learning_rate": 3.159784560143627e-05,
2180
+ "loss": 0.2716,
2181
  "step": 3520
2182
  },
2183
  {
2184
  "epoch": 3.17,
2185
  "learning_rate": 3.1687612208258525e-05,
2186
+ "loss": 0.2981,
2187
  "step": 3530
2188
  },
2189
  {
2190
  "epoch": 3.18,
2191
  "learning_rate": 3.1777378815080796e-05,
2192
+ "loss": 0.2265,
2193
  "step": 3540
2194
  },
2195
  {
2196
  "epoch": 3.18,
2197
  "learning_rate": 3.1867145421903054e-05,
2198
+ "loss": 0.1997,
2199
  "step": 3550
2200
  },
2201
  {
2202
  "epoch": 3.19,
2203
  "learning_rate": 3.195691202872532e-05,
2204
+ "loss": 0.2707,
2205
  "step": 3560
2206
  },
2207
  {
2208
  "epoch": 3.2,
2209
  "learning_rate": 3.2046678635547576e-05,
2210
+ "loss": 0.2917,
2211
  "step": 3570
2212
  },
2213
  {
2214
  "epoch": 3.21,
2215
  "learning_rate": 3.213644524236984e-05,
2216
+ "loss": 0.2966,
2217
  "step": 3580
2218
  },
2219
  {
2220
  "epoch": 3.22,
2221
  "learning_rate": 3.22262118491921e-05,
2222
+ "loss": 0.265,
2223
  "step": 3590
2224
  },
2225
  {
2226
  "epoch": 3.23,
2227
  "learning_rate": 3.231597845601436e-05,
2228
+ "loss": 0.2386,
2229
  "step": 3600
2230
  },
2231
  {
2232
  "epoch": 3.24,
2233
  "learning_rate": 3.240574506283663e-05,
2234
+ "loss": 0.2613,
2235
  "step": 3610
2236
  },
2237
  {
2238
  "epoch": 3.25,
2239
  "learning_rate": 3.249551166965889e-05,
2240
+ "loss": 0.2737,
2241
  "step": 3620
2242
  },
2243
  {
2244
  "epoch": 3.26,
2245
  "learning_rate": 3.258527827648115e-05,
2246
+ "loss": 0.2965,
2247
  "step": 3630
2248
  },
2249
  {
2250
  "epoch": 3.27,
2251
  "learning_rate": 3.2675044883303415e-05,
2252
+ "loss": 0.2683,
2253
  "step": 3640
2254
  },
2255
  {
2256
  "epoch": 3.27,
2257
  "learning_rate": 3.276481149012567e-05,
2258
+ "loss": 0.2379,
2259
  "step": 3650
2260
  },
2261
  {
2262
  "epoch": 3.28,
2263
  "learning_rate": 3.285457809694794e-05,
2264
+ "loss": 0.3282,
2265
  "step": 3660
2266
  },
2267
  {
2268
  "epoch": 3.29,
2269
  "learning_rate": 3.2944344703770195e-05,
2270
+ "loss": 0.2589,
2271
  "step": 3670
2272
  },
2273
  {
2274
  "epoch": 3.3,
2275
  "learning_rate": 3.3034111310592466e-05,
2276
+ "loss": 0.2955,
2277
  "step": 3680
2278
  },
2279
  {
2280
  "epoch": 3.31,
2281
  "learning_rate": 3.3123877917414724e-05,
2282
+ "loss": 0.2584,
2283
  "step": 3690
2284
  },
2285
  {
2286
  "epoch": 3.32,
2287
  "learning_rate": 3.321364452423699e-05,
2288
+ "loss": 0.2643,
2289
  "step": 3700
2290
  },
2291
  {
2292
  "epoch": 3.33,
2293
  "learning_rate": 3.330341113105925e-05,
2294
+ "loss": 0.2987,
2295
  "step": 3710
2296
  },
2297
  {
2298
  "epoch": 3.34,
2299
  "learning_rate": 3.339317773788151e-05,
2300
+ "loss": 0.3,
2301
  "step": 3720
2302
  },
2303
  {
2304
  "epoch": 3.35,
2305
  "learning_rate": 3.348294434470377e-05,
2306
+ "loss": 0.2488,
2307
  "step": 3730
2308
  },
2309
  {
2310
  "epoch": 3.36,
2311
  "learning_rate": 3.3572710951526034e-05,
2312
+ "loss": 0.252,
2313
  "step": 3740
2314
  },
2315
  {
2316
  "epoch": 3.36,
2317
  "learning_rate": 3.36624775583483e-05,
2318
+ "loss": 0.286,
2319
  "step": 3750
2320
  },
2321
  {
2322
  "epoch": 3.37,
2323
  "learning_rate": 3.375224416517056e-05,
2324
+ "loss": 0.2801,
2325
  "step": 3760
2326
  },
2327
  {
2328
  "epoch": 3.38,
2329
  "learning_rate": 3.384201077199282e-05,
2330
+ "loss": 0.2463,
2331
  "step": 3770
2332
  },
2333
  {
2334
  "epoch": 3.39,
2335
  "learning_rate": 3.3931777378815085e-05,
2336
+ "loss": 0.2824,
2337
  "step": 3780
2338
  },
2339
  {
2340
  "epoch": 3.4,
2341
  "learning_rate": 3.402154398563734e-05,
2342
+ "loss": 0.2847,
2343
  "step": 3790
2344
  },
2345
  {
2346
  "epoch": 3.41,
2347
  "learning_rate": 3.411131059245961e-05,
2348
+ "loss": 0.3061,
2349
  "step": 3800
2350
  },
2351
  {
2352
  "epoch": 3.42,
2353
  "learning_rate": 3.4201077199281866e-05,
2354
+ "loss": 0.2799,
2355
  "step": 3810
2356
  },
2357
  {
2358
  "epoch": 3.43,
2359
  "learning_rate": 3.429084380610413e-05,
2360
+ "loss": 0.2532,
2361
  "step": 3820
2362
  },
2363
  {
2364
  "epoch": 3.44,
2365
  "learning_rate": 3.4380610412926395e-05,
2366
+ "loss": 0.2761,
2367
  "step": 3830
2368
  },
2369
  {
2370
  "epoch": 3.44,
2371
  "learning_rate": 3.447037701974866e-05,
2372
+ "loss": 0.3087,
2373
  "step": 3840
2374
  },
2375
  {
2376
  "epoch": 3.45,
2377
  "learning_rate": 3.456014362657092e-05,
2378
+ "loss": 0.2841,
2379
  "step": 3850
2380
  },
2381
  {
2382
  "epoch": 3.46,
2383
  "learning_rate": 3.464991023339318e-05,
2384
+ "loss": 0.2191,
2385
  "step": 3860
2386
  },
2387
  {
2388
  "epoch": 3.47,
2389
  "learning_rate": 3.473967684021544e-05,
2390
+ "loss": 0.328,
2391
  "step": 3870
2392
  },
2393
  {
2394
  "epoch": 3.48,
2395
  "learning_rate": 3.4829443447037704e-05,
2396
+ "loss": 0.2377,
2397
  "step": 3880
2398
  },
2399
  {
2400
  "epoch": 3.49,
2401
  "learning_rate": 3.491921005385996e-05,
2402
+ "loss": 0.2844,
2403
  "step": 3890
2404
  },
2405
  {
2406
  "epoch": 3.5,
2407
  "learning_rate": 3.500897666068223e-05,
2408
+ "loss": 0.2944,
2409
  "step": 3900
2410
  },
2411
  {
2412
  "epoch": 3.51,
2413
  "learning_rate": 3.509874326750449e-05,
2414
+ "loss": 0.2371,
2415
  "step": 3910
2416
  },
2417
  {
2418
  "epoch": 3.52,
2419
  "learning_rate": 3.5188509874326756e-05,
2420
+ "loss": 0.2552,
2421
  "step": 3920
2422
  },
2423
  {
2424
  "epoch": 3.53,
2425
  "learning_rate": 3.5278276481149014e-05,
2426
+ "loss": 0.2492,
2427
  "step": 3930
2428
  },
2429
  {
2430
  "epoch": 3.53,
2431
  "learning_rate": 3.536804308797128e-05,
2432
+ "loss": 0.253,
2433
  "step": 3940
2434
  },
2435
  {
2436
  "epoch": 3.54,
2437
  "learning_rate": 3.5457809694793536e-05,
2438
+ "loss": 0.2794,
2439
  "step": 3950
2440
  },
2441
  {
2442
  "epoch": 3.55,
2443
  "learning_rate": 3.55475763016158e-05,
2444
+ "loss": 0.2253,
2445
  "step": 3960
2446
  },
2447
  {
2448
  "epoch": 3.56,
2449
  "learning_rate": 3.5637342908438065e-05,
2450
+ "loss": 0.2894,
2451
  "step": 3970
2452
  },
2453
  {
2454
  "epoch": 3.57,
2455
  "learning_rate": 3.572710951526032e-05,
2456
+ "loss": 0.2076,
2457
  "step": 3980
2458
  },
2459
  {
2460
  "epoch": 3.58,
2461
  "learning_rate": 3.581687612208259e-05,
2462
+ "loss": 0.2241,
2463
  "step": 3990
2464
  },
2465
  {
2466
  "epoch": 3.59,
2467
  "learning_rate": 3.5906642728904846e-05,
2468
+ "loss": 0.2547,
2469
  "step": 4000
2470
  },
2471
  {
2472
  "epoch": 3.6,
2473
  "learning_rate": 3.599640933572711e-05,
2474
+ "loss": 0.2471,
2475
  "step": 4010
2476
  },
2477
  {
2478
  "epoch": 3.61,
2479
  "learning_rate": 3.6086175942549375e-05,
2480
+ "loss": 0.2585,
2481
  "step": 4020
2482
  },
2483
  {
2484
  "epoch": 3.62,
2485
  "learning_rate": 3.617594254937163e-05,
2486
+ "loss": 0.2617,
2487
  "step": 4030
2488
  },
2489
  {
2490
  "epoch": 3.62,
2491
  "learning_rate": 3.62657091561939e-05,
2492
+ "loss": 0.2724,
2493
  "step": 4040
2494
  },
2495
  {
2496
  "epoch": 3.63,
2497
  "learning_rate": 3.635547576301616e-05,
2498
+ "loss": 0.2353,
2499
  "step": 4050
2500
  },
2501
  {
2502
  "epoch": 3.64,
2503
  "learning_rate": 3.644524236983842e-05,
2504
+ "loss": 0.2288,
2505
  "step": 4060
2506
  },
2507
  {
2508
  "epoch": 3.65,
2509
  "learning_rate": 3.6535008976660684e-05,
2510
+ "loss": 0.2431,
2511
  "step": 4070
2512
  },
2513
  {
2514
  "epoch": 3.66,
2515
  "learning_rate": 3.662477558348294e-05,
2516
+ "loss": 0.2693,
2517
  "step": 4080
2518
  },
2519
  {
2520
  "epoch": 3.67,
2521
  "learning_rate": 3.671454219030521e-05,
2522
+ "loss": 0.2903,
2523
  "step": 4090
2524
  },
2525
  {
2526
  "epoch": 3.68,
2527
  "learning_rate": 3.6804308797127465e-05,
2528
+ "loss": 0.2658,
2529
  "step": 4100
2530
  },
2531
  {
2532
  "epoch": 3.69,
2533
  "learning_rate": 3.6894075403949736e-05,
2534
+ "loss": 0.2175,
2535
  "step": 4110
2536
  },
2537
  {
2538
  "epoch": 3.7,
2539
  "learning_rate": 3.6983842010771994e-05,
2540
+ "loss": 0.3072,
2541
  "step": 4120
2542
  },
2543
  {
2544
  "epoch": 3.7,
2545
  "learning_rate": 3.707360861759426e-05,
2546
+ "loss": 0.305,
2547
  "step": 4130
2548
  },
2549
  {
2550
  "epoch": 3.71,
2551
  "learning_rate": 3.7163375224416516e-05,
2552
+ "loss": 0.2475,
2553
  "step": 4140
2554
  },
2555
  {
2556
  "epoch": 3.72,
2557
  "learning_rate": 3.725314183123878e-05,
2558
+ "loss": 0.2682,
2559
  "step": 4150
2560
  },
2561
  {
2562
  "epoch": 3.73,
2563
  "learning_rate": 3.734290843806104e-05,
2564
+ "loss": 0.2593,
2565
  "step": 4160
2566
  },
2567
  {
2568
  "epoch": 3.74,
2569
  "learning_rate": 3.74326750448833e-05,
2570
+ "loss": 0.3293,
2571
  "step": 4170
2572
  },
2573
  {
2574
  "epoch": 3.75,
2575
  "learning_rate": 3.752244165170557e-05,
2576
+ "loss": 0.2422,
2577
  "step": 4180
2578
  },
2579
  {
2580
  "epoch": 3.76,
2581
  "learning_rate": 3.761220825852783e-05,
2582
+ "loss": 0.2018,
2583
  "step": 4190
2584
  },
2585
  {
2586
  "epoch": 3.77,
2587
  "learning_rate": 3.770197486535009e-05,
2588
+ "loss": 0.2802,
2589
  "step": 4200
2590
  },
2591
  {
2592
  "epoch": 3.78,
2593
  "learning_rate": 3.7791741472172355e-05,
2594
+ "loss": 0.2298,
2595
  "step": 4210
2596
  },
2597
  {
2598
  "epoch": 3.79,
2599
  "learning_rate": 3.788150807899461e-05,
2600
+ "loss": 0.2512,
2601
  "step": 4220
2602
  },
2603
  {
2604
  "epoch": 3.79,
2605
  "learning_rate": 3.797127468581688e-05,
2606
+ "loss": 0.2503,
2607
  "step": 4230
2608
  },
2609
  {
2610
  "epoch": 3.8,
2611
  "learning_rate": 3.8061041292639135e-05,
2612
+ "loss": 0.2244,
2613
  "step": 4240
2614
  },
2615
  {
2616
  "epoch": 3.81,
2617
  "learning_rate": 3.815080789946141e-05,
2618
+ "loss": 0.2456,
2619
  "step": 4250
2620
  },
2621
  {
2622
  "epoch": 3.82,
2623
  "learning_rate": 3.8240574506283664e-05,
2624
+ "loss": 0.2085,
2625
  "step": 4260
2626
  },
2627
  {
2628
  "epoch": 3.83,
2629
  "learning_rate": 3.833034111310593e-05,
2630
+ "loss": 0.2367,
2631
  "step": 4270
2632
  },
2633
  {
2634
  "epoch": 3.84,
2635
  "learning_rate": 3.842010771992819e-05,
2636
+ "loss": 0.2174,
2637
  "step": 4280
2638
  },
2639
  {
2640
  "epoch": 3.85,
2641
  "learning_rate": 3.850987432675045e-05,
2642
+ "loss": 0.2256,
2643
  "step": 4290
2644
  },
2645
  {
2646
  "epoch": 3.86,
2647
  "learning_rate": 3.859964093357271e-05,
2648
+ "loss": 0.2344,
2649
  "step": 4300
2650
  },
2651
  {
2652
  "epoch": 3.87,
2653
  "learning_rate": 3.8689407540394974e-05,
2654
+ "loss": 0.2107,
2655
  "step": 4310
2656
  },
2657
  {
2658
  "epoch": 3.88,
2659
  "learning_rate": 3.877917414721723e-05,
2660
+ "loss": 0.2797,
2661
  "step": 4320
2662
  },
2663
  {
2664
  "epoch": 3.88,
2665
  "learning_rate": 3.88689407540395e-05,
2666
+ "loss": 0.2568,
2667
  "step": 4330
2668
  },
2669
  {
2670
  "epoch": 3.89,
2671
  "learning_rate": 3.895870736086176e-05,
2672
+ "loss": 0.2312,
2673
  "step": 4340
2674
  },
2675
  {
2676
  "epoch": 3.9,
2677
  "learning_rate": 3.9048473967684026e-05,
2678
+ "loss": 0.2269,
2679
  "step": 4350
2680
  },
2681
  {
2682
  "epoch": 3.91,
2683
  "learning_rate": 3.9138240574506283e-05,
2684
+ "loss": 0.2412,
2685
  "step": 4360
2686
  },
2687
  {
2688
  "epoch": 3.92,
2689
  "learning_rate": 3.922800718132855e-05,
2690
+ "loss": 0.2626,
2691
  "step": 4370
2692
  },
2693
  {
2694
  "epoch": 3.93,
2695
  "learning_rate": 3.9317773788150806e-05,
2696
+ "loss": 0.2995,
2697
  "step": 4380
2698
  },
2699
  {
2700
  "epoch": 3.94,
2701
  "learning_rate": 3.940754039497307e-05,
2702
+ "loss": 0.2435,
2703
  "step": 4390
2704
  },
2705
  {
2706
  "epoch": 3.95,
2707
  "learning_rate": 3.9497307001795335e-05,
2708
+ "loss": 0.2317,
2709
  "step": 4400
2710
  },
2711
  {
2712
  "epoch": 3.96,
2713
  "learning_rate": 3.95870736086176e-05,
2714
+ "loss": 0.1932,
2715
  "step": 4410
2716
  },
2717
  {
2718
  "epoch": 3.97,
2719
  "learning_rate": 3.967684021543986e-05,
2720
+ "loss": 0.2498,
2721
  "step": 4420
2722
  },
2723
  {
2724
  "epoch": 3.97,
2725
  "learning_rate": 3.976660682226212e-05,
2726
+ "loss": 0.2792,
2727
  "step": 4430
2728
  },
2729
  {
2730
  "epoch": 3.98,
2731
  "learning_rate": 3.985637342908438e-05,
2732
+ "loss": 0.2554,
2733
  "step": 4440
2734
  },
2735
  {
2736
  "epoch": 3.99,
2737
  "learning_rate": 3.9946140035906645e-05,
2738
+ "loss": 0.3403,
2739
  "step": 4450
2740
  },
2741
  {
2742
  "epoch": 4.0,
2743
  "eval_accuracy": {
2744
+ "accuracy": 0.9382148463781117
2745
  },
2746
  "eval_f1": {
2747
+ "f1": 0.9358183038090213
2748
  },
2749
+ "eval_loss": 0.18279214203357697,
2750
  "eval_precision": {
2751
+ "precision": 0.9351989305945833
2752
  },
2753
  "eval_recall": {
2754
+ "recall": 0.9374254756058774
2755
  },
2756
+ "eval_runtime": 165.3018,
2757
+ "eval_samples_per_second": 107.9,
2758
+ "eval_steps_per_second": 6.745,
2759
  "step": 4459
2760
  },
2761
  {
2762
  "epoch": 4.0,
2763
  "learning_rate": 4.00359066427289e-05,
2764
+ "loss": 0.2936,
2765
  "step": 4460
2766
  },
2767
  {
2768
  "epoch": 4.01,
2769
  "learning_rate": 4.0125673249551174e-05,
2770
+ "loss": 0.2723,
2771
  "step": 4470
2772
  },
2773
  {
2774
  "epoch": 4.02,
2775
  "learning_rate": 4.021543985637343e-05,
2776
+ "loss": 0.2307,
2777
  "step": 4480
2778
  },
2779
  {
2780
  "epoch": 4.03,
2781
  "learning_rate": 4.0305206463195696e-05,
2782
+ "loss": 0.2567,
2783
  "step": 4490
2784
  },
2785
  {
2786
  "epoch": 4.04,
2787
  "learning_rate": 4.0394973070017954e-05,
2788
+ "loss": 0.2273,
2789
  "step": 4500
2790
  },
2791
  {
2792
  "epoch": 4.05,
2793
  "learning_rate": 4.048473967684022e-05,
2794
+ "loss": 0.2638,
2795
  "step": 4510
2796
  },
2797
  {
2798
  "epoch": 4.05,
2799
  "learning_rate": 4.0574506283662476e-05,
2800
+ "loss": 0.2625,
2801
  "step": 4520
2802
  },
2803
  {
2804
  "epoch": 4.06,
2805
  "learning_rate": 4.066427289048474e-05,
2806
+ "loss": 0.2432,
2807
  "step": 4530
2808
  },
2809
  {
2810
  "epoch": 4.07,
2811
  "learning_rate": 4.0754039497307006e-05,
2812
+ "loss": 0.2345,
2813
  "step": 4540
2814
  },
2815
  {
2816
  "epoch": 4.08,
2817
  "learning_rate": 4.084380610412927e-05,
2818
+ "loss": 0.2272,
2819
  "step": 4550
2820
  },
2821
  {
2822
  "epoch": 4.09,
2823
  "learning_rate": 4.093357271095153e-05,
2824
+ "loss": 0.2217,
2825
  "step": 4560
2826
  },
2827
  {
2828
  "epoch": 4.1,
2829
  "learning_rate": 4.102333931777379e-05,
2830
+ "loss": 0.2195,
2831
  "step": 4570
2832
  },
2833
  {
2834
  "epoch": 4.11,
2835
  "learning_rate": 4.111310592459605e-05,
2836
+ "loss": 0.2637,
2837
  "step": 4580
2838
  },
2839
  {
2840
  "epoch": 4.12,
2841
  "learning_rate": 4.1202872531418315e-05,
2842
+ "loss": 0.2668,
2843
  "step": 4590
2844
  },
2845
  {
2846
  "epoch": 4.13,
2847
  "learning_rate": 4.129263913824057e-05,
2848
+ "loss": 0.2742,
2849
  "step": 4600
2850
  },
2851
  {
2852
  "epoch": 4.14,
2853
  "learning_rate": 4.1382405745062844e-05,
2854
+ "loss": 0.2128,
2855
  "step": 4610
2856
  },
2857
  {
2858
  "epoch": 4.14,
2859
  "learning_rate": 4.14721723518851e-05,
2860
+ "loss": 0.241,
2861
  "step": 4620
2862
  },
2863
  {
2864
  "epoch": 4.15,
2865
  "learning_rate": 4.156193895870737e-05,
2866
+ "loss": 0.2332,
2867
  "step": 4630
2868
  },
2869
  {
2870
  "epoch": 4.16,
2871
  "learning_rate": 4.1651705565529625e-05,
2872
+ "loss": 0.2359,
2873
  "step": 4640
2874
  },
2875
  {
2876
  "epoch": 4.17,
2877
  "learning_rate": 4.174147217235189e-05,
2878
+ "loss": 0.2219,
2879
  "step": 4650
2880
  },
2881
  {
2882
  "epoch": 4.18,
2883
  "learning_rate": 4.183123877917415e-05,
2884
+ "loss": 0.2325,
2885
  "step": 4660
2886
  },
2887
  {
2888
  "epoch": 4.19,
2889
  "learning_rate": 4.192100538599641e-05,
2890
+ "loss": 0.2627,
2891
  "step": 4670
2892
  },
2893
  {
2894
  "epoch": 4.2,
2895
  "learning_rate": 4.2010771992818676e-05,
2896
+ "loss": 0.196,
2897
  "step": 4680
2898
  },
2899
  {
2900
  "epoch": 4.21,
2901
  "learning_rate": 4.2100538599640934e-05,
2902
+ "loss": 0.234,
2903
  "step": 4690
2904
  },
2905
  {
2906
  "epoch": 4.22,
2907
  "learning_rate": 4.21903052064632e-05,
2908
+ "loss": 0.1998,
2909
  "step": 4700
2910
  },
2911
  {
2912
  "epoch": 4.23,
2913
  "learning_rate": 4.228007181328546e-05,
2914
+ "loss": 0.2579,
2915
  "step": 4710
2916
  },
2917
  {
2918
  "epoch": 4.23,
2919
  "learning_rate": 4.236983842010772e-05,
2920
+ "loss": 0.2678,
2921
  "step": 4720
2922
  },
2923
  {
2924
  "epoch": 4.24,
2925
  "learning_rate": 4.2459605026929986e-05,
2926
+ "loss": 0.2431,
2927
  "step": 4730
2928
  },
2929
  {
2930
  "epoch": 4.25,
2931
  "learning_rate": 4.2549371633752244e-05,
2932
+ "loss": 0.2274,
2933
  "step": 4740
2934
  },
2935
  {
2936
  "epoch": 4.26,
2937
  "learning_rate": 4.263913824057451e-05,
2938
+ "loss": 0.2325,
2939
  "step": 4750
2940
  },
2941
  {
2942
  "epoch": 4.27,
2943
  "learning_rate": 4.272890484739677e-05,
2944
+ "loss": 0.2121,
2945
  "step": 4760
2946
  },
2947
  {
2948
  "epoch": 4.28,
2949
  "learning_rate": 4.281867145421903e-05,
2950
+ "loss": 0.2466,
2951
  "step": 4770
2952
  },
2953
  {
2954
  "epoch": 4.29,
2955
  "learning_rate": 4.2908438061041295e-05,
2956
+ "loss": 0.2331,
2957
  "step": 4780
2958
  },
2959
  {
2960
  "epoch": 4.3,
2961
  "learning_rate": 4.299820466786356e-05,
2962
+ "loss": 0.221,
2963
  "step": 4790
2964
  },
2965
  {
2966
  "epoch": 4.31,
2967
  "learning_rate": 4.308797127468582e-05,
2968
+ "loss": 0.2563,
2969
  "step": 4800
2970
  },
2971
  {
2972
  "epoch": 4.31,
2973
  "learning_rate": 4.317773788150808e-05,
2974
+ "loss": 0.2564,
2975
  "step": 4810
2976
  },
2977
  {
2978
  "epoch": 4.32,
2979
  "learning_rate": 4.326750448833034e-05,
2980
+ "loss": 0.2369,
2981
  "step": 4820
2982
  },
2983
  {
2984
  "epoch": 4.33,
2985
  "learning_rate": 4.3357271095152605e-05,
2986
+ "loss": 0.1929,
2987
  "step": 4830
2988
  },
2989
  {
2990
  "epoch": 4.34,
2991
  "learning_rate": 4.344703770197487e-05,
2992
+ "loss": 0.2639,
2993
  "step": 4840
2994
  },
2995
  {
2996
  "epoch": 4.35,
2997
  "learning_rate": 4.353680430879713e-05,
2998
+ "loss": 0.232,
2999
  "step": 4850
3000
  },
3001
  {
3002
  "epoch": 4.36,
3003
  "learning_rate": 4.362657091561939e-05,
3004
+ "loss": 0.2453,
3005
  "step": 4860
3006
  },
3007
  {
3008
  "epoch": 4.37,
3009
  "learning_rate": 4.371633752244165e-05,
3010
+ "loss": 0.2145,
3011
  "step": 4870
3012
  },
3013
  {
3014
  "epoch": 4.38,
3015
  "learning_rate": 4.3806104129263914e-05,
3016
+ "loss": 0.2249,
3017
  "step": 4880
3018
  },
3019
  {
3020
  "epoch": 4.39,
3021
  "learning_rate": 4.389587073608618e-05,
3022
+ "loss": 0.2861,
3023
  "step": 4890
3024
  },
3025
  {
3026
  "epoch": 4.4,
3027
  "learning_rate": 4.398563734290844e-05,
3028
+ "loss": 0.2452,
3029
  "step": 4900
3030
  },
3031
  {
3032
  "epoch": 4.4,
3033
  "learning_rate": 4.40754039497307e-05,
3034
+ "loss": 0.2512,
3035
  "step": 4910
3036
  },
3037
  {
3038
  "epoch": 4.41,
3039
  "learning_rate": 4.4165170556552966e-05,
3040
+ "loss": 0.2373,
3041
  "step": 4920
3042
  },
3043
  {
3044
  "epoch": 4.42,
3045
  "learning_rate": 4.4254937163375224e-05,
3046
+ "loss": 0.2262,
3047
  "step": 4930
3048
  },
3049
  {
3050
  "epoch": 4.43,
3051
  "learning_rate": 4.434470377019749e-05,
3052
+ "loss": 0.2243,
3053
  "step": 4940
3054
  },
3055
  {
3056
  "epoch": 4.44,
3057
  "learning_rate": 4.4434470377019746e-05,
3058
+ "loss": 0.2228,
3059
  "step": 4950
3060
  },
3061
  {
3062
  "epoch": 4.45,
3063
  "learning_rate": 4.452423698384201e-05,
3064
+ "loss": 0.2331,
3065
  "step": 4960
3066
  },
3067
  {
3068
  "epoch": 4.46,
3069
  "learning_rate": 4.4614003590664275e-05,
3070
+ "loss": 0.2706,
3071
  "step": 4970
3072
  },
3073
  {
3074
  "epoch": 4.47,
3075
  "learning_rate": 4.470377019748654e-05,
3076
+ "loss": 0.2326,
3077
  "step": 4980
3078
  },
3079
  {
3080
  "epoch": 4.48,
3081
  "learning_rate": 4.47935368043088e-05,
3082
+ "loss": 0.3095,
3083
  "step": 4990
3084
  },
3085
  {
3086
  "epoch": 4.49,
3087
  "learning_rate": 4.488330341113106e-05,
3088
+ "loss": 0.2106,
3089
  "step": 5000
3090
  },
3091
  {
3092
  "epoch": 4.49,
3093
  "learning_rate": 4.497307001795332e-05,
3094
+ "loss": 0.2494,
3095
  "step": 5010
3096
  },
3097
  {
3098
  "epoch": 4.5,
3099
  "learning_rate": 4.5062836624775585e-05,
3100
+ "loss": 0.234,
3101
  "step": 5020
3102
  },
3103
  {
3104
  "epoch": 4.51,
3105
  "learning_rate": 4.515260323159784e-05,
3106
+ "loss": 0.283,
3107
  "step": 5030
3108
  },
3109
  {
3110
  "epoch": 4.52,
3111
  "learning_rate": 4.5242369838420114e-05,
3112
+ "loss": 0.2626,
3113
  "step": 5040
3114
  },
3115
  {
3116
  "epoch": 4.53,
3117
  "learning_rate": 4.533213644524237e-05,
3118
+ "loss": 0.2352,
3119
  "step": 5050
3120
  },
3121
  {
3122
  "epoch": 4.54,
3123
  "learning_rate": 4.5421903052064636e-05,
3124
+ "loss": 0.2748,
3125
  "step": 5060
3126
  },
3127
  {
3128
  "epoch": 4.55,
3129
  "learning_rate": 4.5511669658886894e-05,
3130
+ "loss": 0.2849,
3131
  "step": 5070
3132
  },
3133
  {
3134
  "epoch": 4.56,
3135
  "learning_rate": 4.560143626570916e-05,
3136
+ "loss": 0.2296,
3137
  "step": 5080
3138
  },
3139
  {
3140
  "epoch": 4.57,
3141
  "learning_rate": 4.5691202872531417e-05,
3142
+ "loss": 0.2231,
3143
  "step": 5090
3144
  },
3145
  {
3146
  "epoch": 4.58,
3147
  "learning_rate": 4.578096947935368e-05,
3148
+ "loss": 0.2526,
3149
  "step": 5100
3150
  },
3151
  {
3152
  "epoch": 4.58,
3153
  "learning_rate": 4.5870736086175946e-05,
3154
+ "loss": 0.2441,
3155
  "step": 5110
3156
  },
3157
  {
3158
  "epoch": 4.59,
3159
  "learning_rate": 4.596050269299821e-05,
3160
+ "loss": 0.236,
3161
  "step": 5120
3162
  },
3163
  {
3164
  "epoch": 4.6,
3165
  "learning_rate": 4.605026929982047e-05,
3166
+ "loss": 0.2485,
3167
  "step": 5130
3168
  },
3169
  {
3170
  "epoch": 4.61,
3171
  "learning_rate": 4.614003590664273e-05,
3172
+ "loss": 0.1969,
3173
  "step": 5140
3174
  },
3175
  {
3176
  "epoch": 4.62,
3177
  "learning_rate": 4.622980251346499e-05,
3178
+ "loss": 0.2454,
3179
  "step": 5150
3180
  },
3181
  {
3182
  "epoch": 4.63,
3183
  "learning_rate": 4.6319569120287255e-05,
3184
+ "loss": 0.2486,
3185
  "step": 5160
3186
  },
3187
  {
3188
  "epoch": 4.64,
3189
  "learning_rate": 4.640933572710951e-05,
3190
+ "loss": 0.2304,
3191
  "step": 5170
3192
  },
3193
  {
3194
  "epoch": 4.65,
3195
  "learning_rate": 4.6499102333931784e-05,
3196
+ "loss": 0.2321,
3197
  "step": 5180
3198
  },
3199
  {
3200
  "epoch": 4.66,
3201
  "learning_rate": 4.658886894075404e-05,
3202
+ "loss": 0.2205,
3203
  "step": 5190
3204
  },
3205
  {
3206
  "epoch": 4.66,
3207
  "learning_rate": 4.667863554757631e-05,
3208
+ "loss": 0.2107,
3209
  "step": 5200
3210
  },
3211
  {
3212
  "epoch": 4.67,
3213
  "learning_rate": 4.6768402154398565e-05,
3214
+ "loss": 0.2215,
3215
  "step": 5210
3216
  },
3217
  {
3218
  "epoch": 4.68,
3219
  "learning_rate": 4.685816876122083e-05,
3220
+ "loss": 0.222,
3221
  "step": 5220
3222
  },
3223
  {
3224
  "epoch": 4.69,
3225
  "learning_rate": 4.694793536804309e-05,
3226
+ "loss": 0.2271,
3227
  "step": 5230
3228
  },
3229
  {
3230
  "epoch": 4.7,
3231
  "learning_rate": 4.703770197486535e-05,
3232
+ "loss": 0.2342,
3233
  "step": 5240
3234
  },
3235
  {
3236
  "epoch": 4.71,
3237
  "learning_rate": 4.7127468581687616e-05,
3238
+ "loss": 0.2478,
3239
  "step": 5250
3240
  },
3241
  {
3242
  "epoch": 4.72,
3243
  "learning_rate": 4.721723518850988e-05,
3244
+ "loss": 0.2691,
3245
  "step": 5260
3246
  },
3247
  {
3248
  "epoch": 4.73,
3249
  "learning_rate": 4.730700179533214e-05,
3250
+ "loss": 0.2059,
3251
  "step": 5270
3252
  },
3253
  {
3254
  "epoch": 4.74,
3255
  "learning_rate": 4.7396768402154403e-05,
3256
+ "loss": 0.2085,
3257
  "step": 5280
3258
  },
3259
  {
3260
  "epoch": 4.75,
3261
  "learning_rate": 4.748653500897666e-05,
3262
+ "loss": 0.1888,
3263
  "step": 5290
3264
  },
3265
  {
3266
  "epoch": 4.75,
3267
  "learning_rate": 4.7576301615798926e-05,
3268
+ "loss": 0.2933,
3269
  "step": 5300
3270
  },
3271
  {
3272
  "epoch": 4.76,
3273
  "learning_rate": 4.7666068222621184e-05,
3274
+ "loss": 0.2519,
3275
  "step": 5310
3276
  },
3277
  {
3278
  "epoch": 4.77,
3279
  "learning_rate": 4.775583482944345e-05,
3280
+ "loss": 0.2142,
3281
  "step": 5320
3282
  },
3283
  {
3284
  "epoch": 4.78,
3285
  "learning_rate": 4.784560143626571e-05,
3286
+ "loss": 0.1972,
3287
  "step": 5330
3288
  },
3289
  {
3290
  "epoch": 4.79,
3291
  "learning_rate": 4.793536804308798e-05,
3292
+ "loss": 0.2134,
3293
  "step": 5340
3294
  },
3295
  {
3296
  "epoch": 4.8,
3297
  "learning_rate": 4.8025134649910235e-05,
3298
+ "loss": 0.2924,
3299
  "step": 5350
3300
  },
3301
  {
3302
  "epoch": 4.81,
3303
  "learning_rate": 4.81149012567325e-05,
3304
+ "loss": 0.2207,
3305
  "step": 5360
3306
  },
3307
  {
3308
  "epoch": 4.82,
3309
  "learning_rate": 4.820466786355476e-05,
3310
+ "loss": 0.2369,
3311
  "step": 5370
3312
  },
3313
  {
3314
  "epoch": 4.83,
3315
  "learning_rate": 4.829443447037702e-05,
3316
+ "loss": 0.242,
3317
  "step": 5380
3318
  },
3319
  {
3320
  "epoch": 4.84,
3321
  "learning_rate": 4.838420107719928e-05,
3322
+ "loss": 0.2023,
3323
  "step": 5390
3324
  },
3325
  {
3326
  "epoch": 4.84,
3327
  "learning_rate": 4.847396768402155e-05,
3328
+ "loss": 0.2413,
3329
  "step": 5400
3330
  },
3331
  {
3332
  "epoch": 4.85,
3333
  "learning_rate": 4.856373429084381e-05,
3334
+ "loss": 0.2757,
3335
  "step": 5410
3336
  },
3337
  {
3338
  "epoch": 4.86,
3339
  "learning_rate": 4.8653500897666074e-05,
3340
+ "loss": 0.2481,
3341
  "step": 5420
3342
  },
3343
  {
3344
  "epoch": 4.87,
3345
  "learning_rate": 4.874326750448833e-05,
3346
+ "loss": 0.2487,
3347
  "step": 5430
3348
  },
3349
  {
3350
  "epoch": 4.88,
3351
  "learning_rate": 4.8833034111310596e-05,
3352
+ "loss": 0.1747,
3353
  "step": 5440
3354
  },
3355
  {
3356
  "epoch": 4.89,
3357
  "learning_rate": 4.8922800718132854e-05,
3358
+ "loss": 0.2537,
3359
  "step": 5450
3360
  },
3361
  {
3362
  "epoch": 4.9,
3363
  "learning_rate": 4.901256732495512e-05,
3364
+ "loss": 0.2722,
3365
  "step": 5460
3366
  },
3367
  {
3368
  "epoch": 4.91,
3369
  "learning_rate": 4.9102333931777383e-05,
3370
+ "loss": 0.2225,
3371
  "step": 5470
3372
  },
3373
  {
3374
  "epoch": 4.92,
3375
  "learning_rate": 4.919210053859965e-05,
3376
+ "loss": 0.1949,
3377
  "step": 5480
3378
  },
3379
  {
3380
  "epoch": 4.92,
3381
  "learning_rate": 4.9281867145421906e-05,
3382
+ "loss": 0.2416,
3383
  "step": 5490
3384
  },
3385
  {
3386
  "epoch": 4.93,
3387
  "learning_rate": 4.937163375224417e-05,
3388
+ "loss": 0.2197,
3389
  "step": 5500
3390
  },
3391
  {
3392
  "epoch": 4.94,
3393
  "learning_rate": 4.946140035906643e-05,
3394
+ "loss": 0.2939,
3395
  "step": 5510
3396
  },
3397
  {
3398
  "epoch": 4.95,
3399
  "learning_rate": 4.955116696588869e-05,
3400
+ "loss": 0.2786,
3401
  "step": 5520
3402
  },
3403
  {
3404
  "epoch": 4.96,
3405
  "learning_rate": 4.964093357271095e-05,
3406
+ "loss": 0.2187,
3407
  "step": 5530
3408
  },
3409
  {
3410
  "epoch": 4.97,
3411
  "learning_rate": 4.9730700179533215e-05,
3412
+ "loss": 0.2518,
3413
  "step": 5540
3414
  },
3415
  {
3416
  "epoch": 4.98,
3417
  "learning_rate": 4.982046678635548e-05,
3418
+ "loss": 0.205,
3419
  "step": 5550
3420
  },
3421
  {
3422
  "epoch": 4.99,
3423
  "learning_rate": 4.991023339317774e-05,
3424
+ "loss": 0.2277,
3425
  "step": 5560
3426
  },
3427
  {
3428
  "epoch": 5.0,
3429
  "learning_rate": 5e-05,
3430
+ "loss": 0.2399,
3431
  "step": 5570
3432
  },
3433
  {
3434
  "epoch": 5.0,
3435
  "eval_accuracy": {
3436
+ "accuracy": 0.9539694998878673
3437
  },
3438
  "eval_f1": {
3439
+ "f1": 0.9516195088904471
3440
  },
3441
+ "eval_loss": 0.1283472627401352,
3442
  "eval_precision": {
3443
+ "precision": 0.9525864748624857
3444
  },
3445
  "eval_recall": {
3446
+ "recall": 0.9506985041313745
3447
  },
3448
+ "eval_runtime": 164.7789,
3449
+ "eval_samples_per_second": 108.242,
3450
+ "eval_steps_per_second": 6.767,
3451
  "step": 5573
3452
  }
3453
  ],
tmp-checkpoint-5573/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3394b871720811033add862f1e1383ca3790e6ab55972b19ed900a5d6b59c978
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c55fb57ba4dc2c9b2c813516c4846a21f1047b19b6cf48a97b9224ced523b9b
3
  size 4792